aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org
diff options
context:
space:
mode:
authordespresc <christian.j.j.despres@gmail.com>2019-11-04 16:12:37 -0500
committerJohn MacFarlane <jgm@berkeley.edu>2019-11-12 16:03:45 -0800
commit90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch)
tree4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers/Org
parentd3966372f5049eea56213b069fc4d70d8af9144c (diff)
downloadpandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884. + Use pandoc-types 1.20 and texmath 0.12. + Text is now used instead of String, with a few exceptions. + In the MediaBag module, some of the types using Strings were switched to use FilePath instead (not Text). + In the Parsing module, new parsers `manyChar`, `many1Char`, `manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`, `mantyUntilChar` have been added: these are like their unsuffixed counterparts but pack some or all of their output. + `glob` in Text.Pandoc.Class still takes String since it seems to be intended as an interface to Glob, which uses strings. It seems to be used only once in the package, in the EPUB writer, so that is not hard to change.
Diffstat (limited to 'src/Text/Pandoc/Readers/Org')
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs25
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs219
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs44
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs40
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs154
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs72
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs27
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs27
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs50
9 files changed, 338 insertions, 320 deletions
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 58db4f46c..b4f3cc0d8 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.BlockStarts
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -25,6 +26,8 @@ module Text.Pandoc.Readers.Org.BlockStarts
import Prelude
import Control.Monad (void)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Readers.Org.Parsing
-- | Horizontal Line (five -- dashes or more)
@@ -49,15 +52,15 @@ gridTableStart :: Monad m => OrgParser m ()
gridTableStart = try $ skipSpaces <* char '+' <* char '-'
-latexEnvStart :: Monad m => OrgParser m String
+latexEnvStart :: Monad m => OrgParser m Text
latexEnvStart = try $
skipSpaces *> string "\\begin{"
*> latexEnvName
<* string "}"
<* blankline
where
- latexEnvName :: Monad m => OrgParser m String
- latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
+ latexEnvName :: Monad m => OrgParser m Text
+ latexEnvName = try $ mappend <$> many1Char alphaNum <*> option "" (textStr "*")
bulletListStart :: Monad m => OrgParser m Int
bulletListStart = try $ do
@@ -68,7 +71,7 @@ bulletListStart = try $ do
return (ind + 1)
genericListStart :: Monad m
- => OrgParser m String
+ => OrgParser m Text
-> OrgParser m Int
genericListStart listMarker = try $ do
ind <- length <$> many spaceChar
@@ -82,11 +85,11 @@ eol = void (char '\n')
orderedListStart :: Monad m => OrgParser m Int
orderedListStart = genericListStart orderedListMarker
-- Ordered list markers allowed in org-mode
- where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+ where orderedListMarker = T.snoc <$> many1Char digit <*> oneOf ".)"
-drawerStart :: Monad m => OrgParser m String
+drawerStart :: Monad m => OrgParser m Text
drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline
- where drawerName = char ':' *> manyTill nonspaceChar (char ':')
+ where drawerName = char ':' *> manyTillChar nonspaceChar (char ':')
metaLineStart :: Monad m => OrgParser m ()
metaLineStart = try $ skipSpaces <* string "#+"
@@ -99,12 +102,12 @@ commentLineStart = try $
exampleLineStart :: Monad m => OrgParser m ()
exampleLineStart = () <$ try (skipSpaces *> string ": ")
-noteMarker :: Monad m => OrgParser m String
+noteMarker :: Monad m => OrgParser m Text
noteMarker = try $ do
char '['
- choice [ many1Till digit (char ']')
- , (++) <$> string "fn:"
- <*> many1Till (noneOf "\n\r\t ") (char ']')
+ choice [ many1TillChar digit (char ']')
+ , (<>) <$> textStr "fn:"
+ <*> many1TillChar (noneOf "\n\r\t ") (char ']')
]
-- | Succeeds if the parser is at the end of a block.
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index cba876f06..de51dec3d 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Blocks
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -23,7 +24,7 @@ import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename,
originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Blocks, Inlines)
@@ -33,11 +34,13 @@ import Text.Pandoc.Options
import Text.Pandoc.Shared (compactify, compactifyDL, safeRead)
import Control.Monad (foldM, guard, mzero, void)
-import Data.Char (isSpace, toLower, toUpper)
+import Data.Char (isSpace)
import Data.Default (Default)
-import Data.List (foldl', isPrefixOf)
+import Data.List (foldl')
import Data.Maybe (fromMaybe, isJust, isNothing)
+import Data.Text (Text)
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.Walk as Walk
@@ -90,10 +93,10 @@ horizontalRule = return B.horizontalRule <$ try hline
-- | Attributes that may be added to figures (like a name or caption).
data BlockAttributes = BlockAttributes
- { blockAttrName :: Maybe String
- , blockAttrLabel :: Maybe String
+ { blockAttrName :: Maybe Text
+ , blockAttrLabel :: Maybe Text
, blockAttrCaption :: Maybe (F Inlines)
- , blockAttrKeyValues :: [(String, String)]
+ , blockAttrKeyValues :: [(Text, Text)]
}
-- | Convert BlockAttributes into pandoc Attr
@@ -103,14 +106,14 @@ attrFromBlockAttributes BlockAttributes{..} =
ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues
classes = case lookup "class" blockAttrKeyValues of
Nothing -> []
- Just clsStr -> words clsStr
+ Just clsStr -> T.words clsStr
kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
in (ident, classes, kv)
-stringyMetaAttribute :: Monad m => OrgParser m (String, String)
+stringyMetaAttribute :: Monad m => OrgParser m (Text, Text)
stringyMetaAttribute = try $ do
metaLineStart
- attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
+ attrName <- T.toUpper <$> many1TillChar nonspaceChar (char ':')
skipSpaces
attrValue <- anyLine <|> ("" <$ newline)
return (attrName, attrValue)
@@ -129,8 +132,8 @@ blockAttributes = try $ do
let label = lookup "LABEL" kv
caption' <- case caption of
Nothing -> return Nothing
- Just s -> Just <$> parseFromString inlines (s ++ "\n")
- kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
+ Just s -> Just <$> parseFromString inlines (s <> "\n")
+ kvAttrs' <- parseFromString keyValues . (<> "\n") $ fromMaybe mempty kvAttrs
return BlockAttributes
{ blockAttrName = name
, blockAttrLabel = label
@@ -138,31 +141,31 @@ blockAttributes = try $ do
, blockAttrKeyValues = kvAttrs'
}
where
- isBlockAttr :: String -> Bool
+ isBlockAttr :: Text -> Bool
isBlockAttr = flip elem
[ "NAME", "LABEL", "CAPTION"
, "ATTR_HTML", "ATTR_LATEX"
, "RESULTS"
]
- appendValues :: String -> Maybe String -> (String, String) -> Maybe String
+ appendValues :: Text -> Maybe Text -> (Text, Text) -> Maybe Text
appendValues attrName accValue (key, value) =
if key /= attrName
then accValue
else case accValue of
- Just acc -> Just $ acc ++ ' ':value
+ Just acc -> Just $ acc <> " " <> value
Nothing -> Just value
-- | Parse key-value pairs for HTML attributes
-keyValues :: Monad m => OrgParser m [(String, String)]
+keyValues :: Monad m => OrgParser m [(Text, Text)]
keyValues = try $
manyTill ((,) <$> key <*> value) newline
where
- key :: Monad m => OrgParser m String
- key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
+ key :: Monad m => OrgParser m Text
+ key = try $ skipSpaces *> char ':' *> many1Char nonspaceChar
- value :: Monad m => OrgParser m String
- value = skipSpaces *> manyTill anyChar endOfValue
+ value :: Monad m => OrgParser m Text
+ value = skipSpaces *> manyTillChar anyChar endOfValue
endOfValue :: Monad m => OrgParser m ()
endOfValue =
@@ -180,7 +183,7 @@ orgBlock = try $ do
blockAttrs <- blockAttributes
blkType <- blockHeaderStart
($ blkType) $
- case map toLower blkType of
+ case T.toLower blkType of
"export" -> exportBlock
"comment" -> rawBlockLines (const mempty)
"html" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
@@ -194,13 +197,13 @@ orgBlock = try $ do
let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
in fmap $ B.divWith (ident, classes ++ [blkType], kv)
where
- blockHeaderStart :: Monad m => OrgParser m String
+ blockHeaderStart :: Monad m => OrgParser m Text
blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
- lowercase :: String -> String
- lowercase = map toLower
+ lowercase :: Text -> Text
+ lowercase = T.toLower
-exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
+exampleBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
exampleBlock blockAttrs _label = do
skipSpaces
(classes, kv) <- switchesAsAttributes
@@ -210,54 +213,54 @@ exampleBlock blockAttrs _label = do
let codeBlck = B.codeBlockWith (id', "example":classes, kv) content
return . return $ codeBlck
-rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
+rawBlockLines :: Monad m => (Text -> F Blocks) -> Text -> OrgParser m (F Blocks)
rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType)
-parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
+parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> Text -> OrgParser m (F Blocks)
parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent)
where
parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
parsedBlockContent = try $ do
raw <- rawBlockContent blockType
- parseFromString blocks (raw ++ "\n")
+ parseFromString blocks (raw <> "\n")
-- | Read the raw string content of a block
-rawBlockContent :: Monad m => String -> OrgParser m String
+rawBlockContent :: Monad m => Text -> OrgParser m Text
rawBlockContent blockType = try $ do
blkLines <- manyTill rawLine blockEnder
tabLen <- getOption readerTabStop
trimP <- orgStateTrimLeadBlkIndent <$> getState
- let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs
- (unlines
+ let stripIndent strs = if trimP then map (T.drop (shortestIndent strs)) strs else strs
+ (T.unlines
. stripIndent
. map (tabsToSpaces tabLen . commaEscaped)
$ blkLines)
<$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True })
where
- rawLine :: Monad m => OrgParser m String
+ rawLine :: Monad m => OrgParser m Text
rawLine = try $ ("" <$ blankline) <|> anyLine
blockEnder :: Monad m => OrgParser m ()
blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
- shortestIndent :: [String] -> Int
- shortestIndent = foldr (min . length . takeWhile isSpace) maxBound
- . filter (not . null)
-
- tabsToSpaces :: Int -> String -> String
- tabsToSpaces _ [] = []
- tabsToSpaces tabLen cs'@(c:cs) =
- case c of
- ' ' -> ' ':tabsToSpaces tabLen cs
- '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs
- _ -> cs'
-
- commaEscaped :: String -> String
- commaEscaped (',':cs@('*':_)) = cs
- commaEscaped (',':cs@('#':'+':_)) = cs
- commaEscaped (' ':cs) = ' ':commaEscaped cs
- commaEscaped ('\t':cs) = '\t':commaEscaped cs
- commaEscaped cs = cs
+ shortestIndent :: [Text] -> Int
+ shortestIndent = foldr (min . T.length . T.takeWhile isSpace) maxBound
+ . filter (not . T.null)
+
+ tabsToSpaces :: Int -> Text -> Text
+ tabsToSpaces tabStop t =
+ let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t
+ tabNum = T.length $ T.filter (== '\n') ind
+ spaceNum = T.length ind - tabNum
+ in T.replicate (spaceNum + tabStop * tabNum) " " <> suff
+
+ commaEscaped t =
+ let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t
+ in case T.uncons suff of
+ Just (',', cs)
+ | "*" <- T.take 1 cs -> ind <> cs
+ | "#+" <- T.take 2 cs -> ind <> cs
+ _ -> t
-- | Read but ignore all remaining block headers.
ignHeaders :: Monad m => OrgParser m ()
@@ -265,34 +268,34 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-- | Read a block containing code intended for export in specific backends
-- only.
-exportBlock :: Monad m => String -> OrgParser m (F Blocks)
+exportBlock :: Monad m => Text -> OrgParser m (F Blocks)
exportBlock blockType = try $ do
exportType <- skipSpaces *> orgArgWord <* ignHeaders
contents <- rawBlockContent blockType
- returnF (B.rawBlock (map toLower exportType) contents)
+ returnF (B.rawBlock (T.toLower exportType) contents)
-verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
+verseBlock :: PandocMonad m => Text -> OrgParser m (F Blocks)
verseBlock blockType = try $ do
ignHeaders
content <- rawBlockContent blockType
fmap B.lineBlock . sequence
- <$> mapM parseVerseLine (lines content)
+ <$> mapM parseVerseLine (T.lines content)
where
-- replace initial spaces with nonbreaking spaces to preserve
-- indentation, parse the rest as normal inline
- parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
+ parseVerseLine :: PandocMonad m => Text -> OrgParser m (F Inlines)
parseVerseLine cs = do
- let (initialSpaces, indentedLine) = span isSpace cs
- let nbspIndent = if null initialSpaces
+ let (initialSpaces, indentedLine) = T.span isSpace cs
+ let nbspIndent = if T.null initialSpaces
then mempty
- else B.str $ map (const '\160') initialSpaces
- line <- parseFromString inlines (indentedLine ++ "\n")
+ else B.str $ T.map (const '\160') initialSpaces
+ line <- parseFromString inlines (indentedLine <> "\n")
return (trimInlinesF $ pure nbspIndent <> line)
-- | Read a code block and the associated results block if present. Which of
-- boths blocks is included in the output is determined using the "exports"
-- argument in the block header.
-codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
+codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
codeBlock blockAttrs blockType = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
@@ -314,7 +317,7 @@ codeBlock blockAttrs blockType = do
labelledBlock :: F Inlines -> F Blocks
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
- exportsResults :: [(String, String)] -> Bool
+ exportsResults :: [(Text, Text)] -> Bool
exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
-- | Parse the result of an evaluated babel code block.
@@ -329,7 +332,7 @@ babelResultsBlock = try $ do
resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline
-- | Parse code block arguments
-codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
+codeHeaderArgs :: Monad m => OrgParser m ([Text], [(Text, Text)])
codeHeaderArgs = try $ do
language <- skipSpaces *> orgArgWord
(switchClasses, switchKv) <- switchesAsAttributes
@@ -338,14 +341,14 @@ codeHeaderArgs = try $ do
, originalLang language <> switchKv <> parameters
)
-switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)])
+switchesAsAttributes :: Monad m => OrgParser m ([Text], [(Text, Text)])
switchesAsAttributes = try $ do
switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar)
return $ foldr addToAttr ([], []) switches
where
- addToAttr :: (Char, Maybe String, SwitchPolarity)
- -> ([String], [(String, String)])
- -> ([String], [(String, String)])
+ addToAttr :: (Char, Maybe Text, SwitchPolarity)
+ -> ([Text], [(Text, Text)])
+ -> ([Text], [(Text, Text)])
addToAttr ('n', lineNum, pol) (cls, kv) =
let kv' = case lineNum of
Just num -> ("startFrom", num):kv
@@ -365,15 +368,15 @@ switchPolarity :: Monad m => OrgParser m SwitchPolarity
switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+')
-- | Parses a source block switch option.
-switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+switch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
switch = try $ lineNumberSwitch <|> labelSwitch
<|> whitespaceSwitch <|> simpleSwitch
where
simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter
labelSwitch = genericSwitch 'l' $
- char '"' *> many1Till nonspaceChar (char '"')
+ char '"' *> many1TillChar nonspaceChar (char '"')
-whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
whitespaceSwitch = do
string "-i"
updateState $ \s -> s { orgStateTrimLeadBlkIndent = False }
@@ -382,8 +385,8 @@ whitespaceSwitch = do
-- | Generic source block switch-option parser.
genericSwitch :: Monad m
=> Char
- -> OrgParser m String
- -> OrgParser m (Char, Maybe String, SwitchPolarity)
+ -> OrgParser m Text
+ -> OrgParser m (Char, Maybe Text, SwitchPolarity)
genericSwitch c p = try $ do
polarity <- switchPolarity <* char c <* skipSpaces
arg <- optionMaybe p
@@ -391,17 +394,17 @@ genericSwitch c p = try $ do
-- | Reads a line number switch option. The line number switch can be used with
-- example and source blocks.
-lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
-lineNumberSwitch = genericSwitch 'n' (many digit)
+lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
+lineNumberSwitch = genericSwitch 'n' (manyChar digit)
-blockOption :: Monad m => OrgParser m (String, String)
+blockOption :: Monad m => OrgParser m (Text, Text)
blockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgParamValue
return (argKey, paramValue)
-orgParamValue :: Monad m => OrgParser m String
-orgParamValue = try $
+orgParamValue :: Monad m => OrgParser m Text
+orgParamValue = try $ fmap T.pack $
skipSpaces
*> notFollowedBy orgArgKey
*> noneOf "\n\r" `many1Till` endOfValue
@@ -420,7 +423,7 @@ orgParamValue = try $
-- export setting.
genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
genericDrawer = try $ do
- name <- map toUpper <$> drawerStart
+ name <- T.toUpper <$> drawerStart
content <- manyTill drawerLine (try drawerEnd)
state <- getState
-- Include drawer if it is explicitly included in or not explicitly excluded
@@ -432,16 +435,16 @@ genericDrawer = try $ do
Right names | name `notElem` names -> return mempty
_ -> drawerDiv name <$> parseLines content
where
- parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
- parseLines = parseFromString blocks . (++ "\n") . unlines
+ parseLines :: PandocMonad m => [Text] -> OrgParser m (F Blocks)
+ parseLines = parseFromString blocks . (<> "\n") . T.unlines
- drawerDiv :: String -> F Blocks -> F Blocks
+ drawerDiv :: Text -> F Blocks -> F Blocks
drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
-drawerLine :: Monad m => OrgParser m String
+drawerLine :: Monad m => OrgParser m Text
drawerLine = anyLine
-drawerEnd :: Monad m => OrgParser m String
+drawerEnd :: Monad m => OrgParser m Text
drawerEnd = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
@@ -456,17 +459,17 @@ figure :: PandocMonad m => OrgParser m (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
- case cleanLinkString src of
+ case cleanLinkText src of
Nothing -> mzero
Just imgSrc -> do
guard (isImageFilename imgSrc)
let isFigure = isJust $ blockAttrCaption figAttrs
return $ imageBlock isFigure figAttrs imgSrc
where
- selfTarget :: PandocMonad m => OrgParser m String
+ selfTarget :: PandocMonad m => OrgParser m Text
selfTarget = try $ char '[' *> linkTarget <* char ']'
- imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
+ imageBlock :: Bool -> BlockAttributes -> Text -> F Blocks
imageBlock isFigure figAttrs imgSrc =
let
figName = fromMaybe mempty $ blockAttrName figAttrs
@@ -478,11 +481,11 @@ figure = try $ do
in
B.para . B.imageWith attr imgSrc figTitle <$> figCaption
- withFigPrefix :: String -> String
+ withFigPrefix :: Text -> Text
withFigPrefix cs =
- if "fig:" `isPrefixOf` cs
+ if "fig:" `T.isPrefixOf` cs
then cs
- else "fig:" ++ cs
+ else "fig:" <> cs
-- | Succeeds if looking at the end of the current paragraph
endOfParagraph :: Monad m => OrgParser m ()
@@ -495,12 +498,12 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
-- | Example code marked up by a leading colon.
example :: Monad m => OrgParser m (F Blocks)
-example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine
+example = try $ returnF . exampleCode =<< T.unlines <$> many1 exampleLine
where
- exampleLine :: Monad m => OrgParser m String
+ exampleLine :: Monad m => OrgParser m Text
exampleLine = try $ exampleLineStart *> anyLine
-exampleCode :: String -> Blocks
+exampleCode :: Text -> Blocks
exampleCode = B.codeBlockWith ("", ["example"], [])
@@ -516,7 +519,7 @@ include :: PandocMonad m => OrgParser m (F Blocks)
include = try $ do
metaLineStart <* stringAnyCase "include:" <* skipSpaces
filename <- includeTarget
- includeArgs <- many (try $ skipSpaces *> many1 alphaNum)
+ includeArgs <- many (try $ skipSpaces *> many1Char alphaNum)
params <- keyValues
blocksParser <- case includeArgs of
("example" : _) -> return $ pure . B.codeBlock <$> parseRaw
@@ -535,10 +538,10 @@ include = try $ do
char '"'
manyTill (noneOf "\n\r\t") (char '"')
- parseRaw :: PandocMonad m => OrgParser m String
- parseRaw = many anyChar
+ parseRaw :: PandocMonad m => OrgParser m Text
+ parseRaw = manyChar anyChar
- blockFilter :: [(String, String)] -> [Block] -> [Block]
+ blockFilter :: [(Text, Text)] -> [Block] -> [Block]
blockFilter params blks =
let minlvl = lookup "minlevel" params
in case (minlvl >>= safeRead :: Maybe Int) of
@@ -660,7 +663,7 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
<$> (skipSpaces
*> char '<'
*> optionMaybe tableAlignFromChar)
- <*> (optionMaybe (many1 digit >>= safeRead)
+ <*> (optionMaybe (many1Char digit >>= safeRead)
<* char '>'
<* emptyCell)
@@ -739,10 +742,10 @@ latexFragment = try $ do
, "\\end{", e, "}\n"
]
-latexEnd :: Monad m => String -> OrgParser m ()
+latexEnd :: Monad m => Text -> OrgParser m ()
latexEnd envName = try $
() <$ skipSpaces
- <* string ("\\end{" ++ envName ++ "}")
+ <* textStr ("\\end{" <> envName <> "}")
<* blankline
@@ -813,12 +816,12 @@ definitionListItem :: PandocMonad m
-> OrgParser m (F (Inlines, [Blocks]))
definitionListItem parseIndentedMarker = try $ do
markerLength <- parseIndentedMarker
- term <- manyTill (noneOf "\n\r") (try definitionMarker)
+ term <- manyTillChar (noneOf "\n\r") (try definitionMarker)
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- cont <- concat <$> many (listContinuation markerLength)
+ cont <- T.concat <$> many (listContinuation markerLength)
term' <- parseFromString inlines term
- contents' <- parseFromString blocks $ line1 ++ blank ++ cont
+ contents' <- parseFromString blocks $ line1 <> blank <> cont
return $ (,) <$> term' <*> fmap (:[]) contents'
where
definitionMarker =
@@ -832,16 +835,16 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do
markerLength <- try parseIndentedMarker
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString blocks $ firstLine ++ blank ++ rest
+ rest <- T.concat <$> many (listContinuation markerLength)
+ parseFromString blocks $ firstLine <> blank <> rest
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: PandocMonad m => Int -> OrgParser m String
+listContinuation :: PandocMonad m => Int -> OrgParser m Text
listContinuation markerLength = try $ do
notFollowedBy' blankline
- mappend <$> (concat <$> many1 (listContinuation' markerLength))
- <*> many blankline
+ mappend <$> (T.concat <$> many1 (listContinuation' markerLength))
+ <*> manyChar blankline
where
listContinuation' indentation =
blockLines indentation <|> listLine indentation
@@ -853,6 +856,6 @@ listContinuation markerLength = try $ do
>> blockAttributes
>>= (\blockAttrs ->
case attrFromBlockAttributes blockAttrs of
- ("", [], []) -> count 1 anyChar
+ ("", [], []) -> countChar 1 anyChar
_ -> indentWith indentation))
>> (snd <$> withRaw orgBlock)
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index c96087be7..09a501b68 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -1,5 +1,6 @@
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.DocumentTree
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -17,9 +18,9 @@ module Text.Pandoc.Readers.Org.DocumentTree
import Prelude
import Control.Arrow ((***))
import Control.Monad (guard, void)
-import Data.Char (toLower, toUpper)
import Data.List (intersperse)
import Data.Maybe (mapMaybe)
+import Data.Text (Text)
import Text.Pandoc.Builder (Blocks, Inlines)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
@@ -28,6 +29,7 @@ import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import qualified Data.Set as Set
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
--
@@ -59,28 +61,28 @@ documentTree blocks inline = do
}
-- | Create a tag containing the given string.
-toTag :: String -> Tag
+toTag :: Text -> Tag
toTag = Tag
-- | The key (also called name or type) of a property.
-newtype PropertyKey = PropertyKey { fromKey :: String }
+newtype PropertyKey = PropertyKey { fromKey :: Text }
deriving (Show, Eq, Ord)
-- | Create a property key containing the given string. Org mode keys are
-- case insensitive and are hence converted to lower case.
-toPropertyKey :: String -> PropertyKey
-toPropertyKey = PropertyKey . map toLower
+toPropertyKey :: Text -> PropertyKey
+toPropertyKey = PropertyKey . T.toLower
-- | The value assigned to a property.
-newtype PropertyValue = PropertyValue { fromValue :: String }
+newtype PropertyValue = PropertyValue { fromValue :: Text }
-- | Create a property value containing the given string.
-toPropertyValue :: String -> PropertyValue
+toPropertyValue :: Text -> PropertyValue
toPropertyValue = PropertyValue
-- | Check whether the property value is non-nil (i.e. truish).
isNonNil :: PropertyValue -> Bool
-isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"]
+isNonNil p = T.toLower (fromValue p) `notElem` ["()", "{}", "nil"]
-- | Key/value pairs from a PROPERTIES drawer
type Properties = [(PropertyKey, PropertyValue)]
@@ -273,7 +275,7 @@ headlineToHeader hdln = do
todoKeyword :: Monad m => OrgParser m TodoMarker
todoKeyword = try $ do
taskStates <- activeTodoMarkers <$> getState
- let kwParser tdm = try (tdm <$ string (todoMarkerName tdm)
+ let kwParser tdm = try (tdm <$ textStr (todoMarkerName tdm)
<* spaceChar
<* updateLastPreCharPos)
choice (map kwParser taskStates)
@@ -281,26 +283,26 @@ todoKeyword = try $ do
todoKeywordToInlines :: TodoMarker -> Inlines
todoKeywordToInlines tdm =
let todoText = todoMarkerName tdm
- todoState = map toLower . show $ todoMarkerState tdm
+ todoState = T.toLower . T.pack . show $ todoMarkerState tdm
classes = [todoState, todoText]
in B.spanWith (mempty, classes, mempty) (B.str todoText)
propertiesToAttr :: Properties -> Attr
propertiesToAttr properties =
let
- toStringPair = fromKey *** fromValue
+ toTextPair = fromKey *** fromValue
customIdKey = toPropertyKey "custom_id"
classKey = toPropertyKey "class"
unnumberedKey = toPropertyKey "unnumbered"
specialProperties = [customIdKey, classKey, unnumberedKey]
id' = maybe mempty fromValue . lookup customIdKey $ properties
cls = maybe mempty fromValue . lookup classKey $ properties
- kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst)
+ kvs' = map toTextPair . filter ((`notElem` specialProperties) . fst)
$ properties
isUnnumbered =
maybe False isNonNil . lookup unnumberedKey $ properties
in
- (id', words cls ++ ["unnumbered" | isUnnumbered], kvs')
+ (id', T.words cls ++ ["unnumbered" | isUnnumbered], kvs')
tagsToInlines :: [Tag] -> Inlines
tagsToInlines [] = mempty
@@ -336,15 +338,15 @@ planningToBlock planning = do
<> B.emph (B.str time)
-- | An Org timestamp, including repetition marks. TODO: improve
-type Timestamp = String
+type Timestamp = Text
timestamp :: Monad m => OrgParser m Timestamp
timestamp = try $ do
openChar <- oneOf "<["
let isActive = openChar == '<'
let closeChar = if isActive then '>' else ']'
- content <- many1Till anyChar (char closeChar)
- return (openChar : content ++ [closeChar])
+ content <- many1TillChar anyChar (char closeChar)
+ return $ T.cons openChar $ content `T.snoc` closeChar
-- | Planning information for a subtree/headline.
data PlanningInfo = PlanningInfo
@@ -374,7 +376,7 @@ planningInfo = try $ do
propertiesDrawer :: Monad m => OrgParser m Properties
propertiesDrawer = try $ do
drawerType <- drawerStart
- guard $ map toUpper drawerType == "PROPERTIES"
+ guard $ T.toUpper drawerType == "PROPERTIES"
manyTill property (try endOfDrawer)
where
property :: Monad m => OrgParser m (PropertyKey, PropertyValue)
@@ -382,12 +384,12 @@ propertiesDrawer = try $ do
key :: Monad m => OrgParser m PropertyKey
key = fmap toPropertyKey . try $
- skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
+ skipSpaces *> char ':' *> many1TillChar nonspaceChar (char ':')
value :: Monad m => OrgParser m PropertyValue
value = fmap toPropertyValue . try $
- skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
+ skipSpaces *> manyTillChar anyChar (try $ skipSpaces *> newline)
- endOfDrawer :: Monad m => OrgParser m String
+ endOfDrawer :: Monad m => OrgParser m Text
endOfDrawer = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index f783eaa0f..f1f089273 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ExportSettings
Copyright : © 2016–2019 Albert Krewinkel
@@ -21,6 +22,7 @@ import Text.Pandoc.Readers.Org.Parsing
import Control.Monad (mzero, void)
import Data.Char (toLower)
import Data.Maybe (listToMaybe)
+import Data.Text (Text)
-- | Read and handle space separated org-mode export settings.
exportSettings :: PandocMonad m => OrgParser m ()
@@ -70,11 +72,11 @@ exportSetting = choice
genericExportSetting :: Monad m
=> OrgParser m a
- -> String
+ -> Text
-> ExportSettingSetter a
-> OrgParser m ()
genericExportSetting optionParser settingIdentifier setter = try $ do
- _ <- string settingIdentifier *> char ':'
+ _ <- textStr settingIdentifier *> char ':'
value <- optionParser
updateState $ modifyExportSettings value
where
@@ -82,11 +84,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do
st { orgStateExportSettings = setter val . orgStateExportSettings $ st }
-- | A boolean option, either nil (False) or non-nil (True).
-booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m ()
+booleanSetting :: Monad m => Text -> ExportSettingSetter Bool -> OrgParser m ()
booleanSetting = genericExportSetting elispBoolean
-- | An integer-valued option.
-integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m ()
+integerSetting :: Monad m => Text -> ExportSettingSetter Int -> OrgParser m ()
integerSetting = genericExportSetting parseInt
where
parseInt = try $
@@ -95,7 +97,7 @@ integerSetting = genericExportSetting parseInt
-- | Either the string "headline" or an elisp boolean and treated as an
-- @ArchivedTreesOption@.
archivedTreeSetting :: Monad m
- => String
+ => Text
-> ExportSettingSetter ArchivedTreesOption
-> OrgParser m ()
archivedTreeSetting =
@@ -115,42 +117,42 @@ archivedTreeSetting =
-- | A list or a complement list (i.e. a list starting with `not`).
complementableListSetting :: Monad m
- => String
- -> ExportSettingSetter (Either [String] [String])
+ => Text
+ -> ExportSettingSetter (Either [Text] [Text])
-> OrgParser m ()
complementableListSetting = genericExportSetting $ choice
- [ Left <$> complementStringList
+ [ Left <$> complementTextList
, Right <$> stringList
, (\b -> if b then Left [] else Right []) <$> elispBoolean
]
where
-- Read a plain list of strings.
- stringList :: Monad m => OrgParser m [String]
+ stringList :: Monad m => OrgParser m [Text]
stringList = try $
char '('
- *> sepBy elispString spaces
+ *> sepBy elispText spaces
<* char ')'
-- Read an emacs lisp list specifying a complement set.
- complementStringList :: Monad m => OrgParser m [String]
- complementStringList = try $
+ complementTextList :: Monad m => OrgParser m [Text]
+ complementTextList = try $
string "(not "
- *> sepBy elispString spaces
+ *> sepBy elispText spaces
<* char ')'
- elispString :: Monad m => OrgParser m String
- elispString = try $
+ elispText :: Monad m => OrgParser m Text
+ elispText = try $
char '"'
- *> manyTill alphaNum (char '"')
+ *> manyTillChar alphaNum (char '"')
-- | Read but ignore the export setting.
-ignoredSetting :: Monad m => String -> OrgParser m ()
-ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
+ignoredSetting :: Monad m => Text -> OrgParser m ()
+ignoredSetting s = try (() <$ textStr s <* char ':' <* many1 nonspaceChar)
-- | Read any setting string, but ignore it and emit a warning.
ignoreAndWarn :: PandocMonad m => OrgParser m ()
ignoreAndWarn = try $ do
- opt <- many1 nonspaceChar
+ opt <- many1Char nonspaceChar
report (UnknownOrgExportOption opt)
return ()
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index cae590c5f..da638f717 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -20,7 +20,7 @@ import Prelude
import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename,
originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Inlines)
@@ -38,12 +38,14 @@ import Control.Monad.Trans (lift)
import Data.Char (isAlphaNum, isSpace)
import Data.List (intersperse)
import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
import Data.Maybe (fromMaybe)
--
-- Functions acting on the parser state
--
-recordAnchorId :: PandocMonad m => String -> OrgParser m ()
+recordAnchorId :: PandocMonad m => Text -> OrgParser m ()
recordAnchorId i = updateState $ \s ->
s{ orgStateAnchorIds = i : orgStateAnchorIds s }
@@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines)
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
str :: PandocMonad m => OrgParser m (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ")
<* updateLastStrPos
-- | An endline character that can be treated as a space, not a structural
@@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do
-- | Read a citation key. The characters allowed in citation keys are taken
-- from the `org-ref-cite-re` variable in `org-ref.el`.
-orgRefCiteKey :: PandocMonad m => OrgParser m String
+orgRefCiteKey :: PandocMonad m => OrgParser m Text
orgRefCiteKey =
let citeKeySpecialChars = "-_:\\./," :: String
isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
@@ -329,7 +331,7 @@ orgRefCiteKey =
endOfCitation = try $ do
many $ satisfy isCiteKeySpecialChar
satisfy $ not . isCiteKeyChar
- in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation
+ in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation
-- | Supported citation types. Only a small subset of org-ref types is
@@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote
inlineNote :: PandocMonad m => OrgParser m (F Inlines)
inlineNote = try $ do
string "[fn:"
- ref <- many alphaNum
+ ref <- manyChar alphaNum
char ':'
note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
- unless (null ref) $
- addToNotesTable ("fn:" ++ ref, note)
+ unless (T.null ref) $
+ addToNotesTable ("fn:" <> ref, note)
return $ B.note <$> note
referencedNote :: PandocMonad m => OrgParser m (F Inlines)
@@ -397,7 +399,7 @@ referencedNote = try $ do
return $ do
notes <- asksF orgStateNotes'
case lookup ref notes of
- Nothing -> return . B.str $ "[" ++ ref ++ "]"
+ Nothing -> return . B.str $ "[" <> ref <> "]"
Just contents -> do
st <- askF
let contents' = runF contents st{ orgStateNotes' = [] }
@@ -420,7 +422,7 @@ explicitOrImageLink = try $ do
return $ do
src <- srcF
title <- titleF
- case cleanLinkString descr of
+ case cleanLinkText descr of
Just imgSrc | isImageFilename imgSrc ->
return . B.link src "" $ B.image imgSrc mempty mempty
_ ->
@@ -429,10 +431,10 @@ explicitOrImageLink = try $ do
selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines)
selflinkOrImage = try $ do
target <- char '[' *> linkTarget <* char ']'
- case cleanLinkString target of
- Nothing -> case target of
- '#':_ -> returnF $ B.link target "" (B.str target)
- _ -> return $ internalLink target (B.str target)
+ case cleanLinkText target of
+ Nothing -> case T.uncons target of
+ Just ('#', _) -> returnF $ B.link target "" (B.str target)
+ _ -> return $ internalLink target (B.str target)
Just nonDocTgt -> if isImageFilename nonDocTgt
then returnF $ B.image nonDocTgt "" ""
else returnF $ B.link nonDocTgt "" (B.str target)
@@ -449,35 +451,35 @@ angleLink = try $ do
char '>'
return link
-linkTarget :: PandocMonad m => OrgParser m String
-linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]")
+linkTarget :: PandocMonad m => OrgParser m Text
+linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]")
-possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String
+possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text
possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-applyCustomLinkFormat :: String -> OrgParser m (F String)
+applyCustomLinkFormat :: Text -> OrgParser m (F Text)
applyCustomLinkFormat link = do
- let (linkType, rest) = break (== ':') link
+ let (linkType, rest) = T.break (== ':') link
return $ do
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
- return $ maybe link ($ drop 1 rest) formatter
+ return $ maybe link ($ T.drop 1 rest) formatter
-- | Take a link and return a function which produces new inlines when given
-- description inlines.
-linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF :: Text -> Inlines -> F Inlines
linkToInlinesF linkStr =
- case linkStr of
- "" -> pure . B.link mempty "" -- wiki link (empty by convention)
- ('#':_) -> pure . B.link linkStr "" -- document-local fraction
- _ -> case cleanLinkString linkStr of
- Just extTgt -> return . B.link extTgt ""
- Nothing -> internalLink linkStr -- other internal link
-
-internalLink :: String -> Inlines -> F Inlines
+ case T.uncons linkStr of
+ Nothing -> pure . B.link mempty "" -- wiki link (empty by convention)
+ Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction
+ _ -> case cleanLinkText linkStr of
+ Just extTgt -> return . B.link extTgt ""
+ Nothing -> internalLink linkStr -- other internal link
+
+internalLink :: Text -> Inlines -> F Inlines
internalLink link title = do
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
if anchorB
- then return $ B.link ('#':link) "" title
+ then return $ B.link ("#" <> link) "" title
else return $ B.emph title
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
@@ -493,15 +495,15 @@ anchor = try $ do
returnF $ B.spanWith (solidify anchorId, [], []) mempty
where
parseAnchor = string "<<"
- *> many1 (noneOf "\t\n\r<>\"' ")
+ *> many1Char (noneOf "\t\n\r<>\"' ")
<* string ">>"
<* skipSpaces
-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors
-- the org function @org-export-solidify-link-text@.
-solidify :: String -> String
-solidify = map replaceSpecialChar
+solidify :: Text -> Text
+solidify = T.map replaceSpecialChar
where replaceSpecialChar c
| isAlphaNum c = c
| c `elem` ("_.-:" :: String) = c
@@ -511,25 +513,25 @@ solidify = map replaceSpecialChar
inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
inlineCodeBlock = try $ do
string "src_"
- lang <- many1 orgArgWordChar
+ lang <- many1Char orgArgWordChar
opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
- inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r")
+ inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r")
let attrClasses = [translateLang lang]
let attrKeyVal = originalLang lang <> opts
let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode
returnF $ if exportsCode opts then codeInlineBlck else mempty
where
- inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
+ inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text)
inlineBlockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgInlineParamValue
return (argKey, paramValue)
- orgInlineParamValue :: PandocMonad m => OrgParser m String
+ orgInlineParamValue :: PandocMonad m => OrgParser m Text
orgInlineParamValue = try $
skipSpaces
*> notFollowedBy (char ':')
- *> many1 (noneOf "\t\n\r ]")
+ *> many1Char (noneOf "\t\n\r ]")
<* skipSpaces
@@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
math :: PandocMonad m => OrgParser m (F Inlines)
math = return . B.math <$> choice [ math1CharBetween '$'
- , mathStringBetween '$'
+ , mathTextBetween '$'
, rawMathBetween "\\(" "\\)"
]
@@ -604,7 +606,7 @@ updatePositions c = do
return c
symbol :: PandocMonad m => OrgParser m (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions)
emphasisBetween :: PandocMonad m
=> Char
@@ -619,7 +621,7 @@ emphasisBetween c = try $ do
verbatimBetween :: PandocMonad m
=> Char
- -> OrgParser m String
+ -> OrgParser m Text
verbatimBetween c = try $
emphasisStart c *>
many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c)
@@ -627,33 +629,33 @@ verbatimBetween c = try $
verbatimChar = noneOf "\n\r" >>= updatePositions
-- | Parses a raw string delimited by @c@ using Org's math rules
-mathStringBetween :: PandocMonad m
+mathTextBetween :: PandocMonad m
=> Char
- -> OrgParser m String
-mathStringBetween c = try $ do
+ -> OrgParser m Text
+mathTextBetween c = try $ do
mathStart c
body <- many1TillNOrLessNewlines mathAllowedNewlines
(noneOf (c:"\n\r"))
(lookAhead $ mathEnd c)
final <- mathEnd c
- return $ body ++ [final]
+ return $ T.snoc body final
-- | Parse a single character between @c@ using math rules
math1CharBetween :: PandocMonad m
=> Char
- -> OrgParser m String
+ -> OrgParser m Text
math1CharBetween c = try $ do
char c
res <- noneOf $ c:mathForbiddenBorderChars
char c
eof <|> () <$ lookAhead (oneOf mathPostChars)
- return [res]
+ return $ T.singleton res
rawMathBetween :: PandocMonad m
- => String
- -> String
- -> OrgParser m String
-rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
+ => Text
+ -> Text
+ -> OrgParser m Text
+rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e)
-- | Parses the start (opening character) of emphasis
emphasisStart :: PandocMonad m => Char -> OrgParser m Char
@@ -702,10 +704,10 @@ enclosedInlines start end = try $
enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a
-> OrgParser m b
- -> OrgParser m String
+ -> OrgParser m Text
enclosedRaw start end = try $
start *> (onSingleLine <|> spanningTwoLines)
- where onSingleLine = try $ many1Till (noneOf "\n\r") end
+ where onSingleLine = try $ many1TillChar (noneOf "\n\r") end
spanningTwoLines = try $
anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
@@ -714,7 +716,7 @@ enclosedRaw start end = try $
many1TillNOrLessNewlines :: PandocMonad m => Int
-> OrgParser m Char
-> OrgParser m a
- -> OrgParser m String
+ -> OrgParser m Text
many1TillNOrLessNewlines n p end = try $
nMoreLines (Just n) mempty >>= oneOrMore
where
@@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
finalLine = try $ manyTill p end
minus1 k = k - 1
- oneOrMore cs = cs <$ guard (not $ null cs)
+ oneOrMore cs = T.pack cs <$ guard (not $ null cs)
-- Org allows customization of the way it reads emphasis. We use the defaults
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
@@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines)
subOrSuperExpr = try $
choice [ charsInBalanced '{' '}' (noneOf "\n\r")
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
- , simpleSubOrSuperString
+ , simpleSubOrSuperText
] >>= parseFromString (mconcat <$> many inline)
- where enclosing (left, right) s = left : s ++ [right]
+ where enclosing (left, right) s = T.cons left $ T.snoc s right
-simpleSubOrSuperString :: PandocMonad m => OrgParser m String
-simpleSubOrSuperString = try $ do
+simpleSubOrSuperText :: PandocMonad m => OrgParser m Text
+simpleSubOrSuperText = try $ do
state <- getState
guard . exportSubSuperscripts . orgStateExportSettings $ state
- choice [ string "*"
- , mappend <$> option [] ((:[]) <$> oneOf "+-")
- <*> many1 alphaNum
+ choice [ textStr "*"
+ , mappend <$> option "" (T.singleton <$> oneOf "+-")
+ <*> many1Char alphaNum
]
inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines)
@@ -793,28 +795,28 @@ inlineLaTeX = try $ do
maybe mzero returnF $
parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils
where
- parseAsMath :: String -> Maybe Inlines
+ parseAsMath :: Text -> Maybe Inlines
parseAsMath cs = B.fromList <$> texMathToPandoc cs
- parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines)
+ parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines)
parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs
- parseAsMathMLSym :: String -> Maybe Inlines
+ parseAsMathMLSym :: Text -> Maybe Inlines
parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
-- drop initial backslash and any trailing "{}"
- where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
+ where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1
state :: ParserState
state = def{ stateOptions = def{ readerExtensions =
enableExtension Ext_raw_tex (readerExtensions def) } }
- texMathToPandoc :: String -> Maybe [Inline]
+ texMathToPandoc :: Text -> Maybe [Inline]
texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline
maybeRight :: Either a b -> Maybe b
maybeRight = either (const Nothing) Just
-inlineLaTeXCommand :: PandocMonad m => OrgParser m String
+inlineLaTeXCommand :: PandocMonad m => OrgParser m Text
inlineLaTeXCommand = try $ do
rest <- getInput
st <- getState
@@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do
Right cs -> do
-- drop any trailing whitespace, those are not be part of the command as
-- far as org mode is concerned.
- let cmdNoSpc = dropWhileEnd isSpace cs
- let len = length cmdNoSpc
+ let cmdNoSpc = T.dropWhileEnd isSpace cs
+ let len = T.length cmdNoSpc
count len anyChar
return cmdNoSpc
_ -> mzero
--- Taken from Data.OldList.
-dropWhileEnd :: (a -> Bool) -> [a] -> [a]
-dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
-
exportSnippet :: PandocMonad m => OrgParser m (F Inlines)
exportSnippet = try $ do
string "@@"
- format <- many1Till (alphaNum <|> char '-') (char ':')
- snippet <- manyTill anyChar (try $ string "@@")
+ format <- many1TillChar (alphaNum <|> char '-') (char ':')
+ snippet <- manyTillChar anyChar (try $ string "@@")
returnF $ B.rawInline format snippet
macro :: PandocMonad m => OrgParser m (F Inlines)
@@ -845,7 +843,7 @@ macro = try $ do
recursionDepth <- orgStateMacroDepth <$> getState
guard $ recursionDepth < 15
string "{{{"
- name <- many alphaNum
+ name <- manyChar alphaNum
args <- ([] <$ string "}}}")
<|> char '(' *> argument `sepBy` char ',' <* eoa
expander <- lookupMacro name <$> getState
@@ -857,7 +855,7 @@ macro = try $ do
updateState $ \s -> s { orgStateMacroDepth = recursionDepth }
return res
where
- argument = many $ notFollowedBy eoa *> noneOf ","
+ argument = manyChar $ notFollowedBy eoa *> noneOf ","
eoa = string ")}}}"
smart :: PandocMonad m => OrgParser m (F Inlines)
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 0a388403e..811a5b974 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Meta
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -30,11 +31,12 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
import Control.Monad (mzero, void, when)
-import Data.Char (toLower)
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
import Network.HTTP (urlEncode)
-- | Returns the current meta, respecting export options.
@@ -47,7 +49,7 @@ metaExport = do
. (if exportWithEmail settings then id else removeMeta "email")
<$> orgStateMeta st
-removeMeta :: String -> Meta -> Meta
+removeMeta :: Text -> Meta -> Meta
removeMeta key meta' =
let metaMap = unMeta meta'
in Meta $ M.delete key metaMap
@@ -60,18 +62,18 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
declarationLine :: PandocMonad m => OrgParser m ()
declarationLine = try $ do
- key <- map toLower <$> metaKey
+ key <- T.toLower <$> metaKey
(key', value) <- metaValue key
let addMetaValue st =
st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st }
when (key' /= "results") $ updateState addMetaValue
-metaKey :: Monad m => OrgParser m String
-metaKey = map toLower <$> many1 (noneOf ": \n\r")
- <* char ':'
- <* skipSpaces
+metaKey :: Monad m => OrgParser m Text
+metaKey = T.toLower <$> many1Char (noneOf ": \n\r")
+ <* char ':'
+ <* skipSpaces
-metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue)
+metaValue :: PandocMonad m => Text -> OrgParser m (Text, F MetaValue)
metaValue key =
let inclKey = "header-includes"
in case key of
@@ -88,7 +90,7 @@ metaValue key =
-- Org-mode expects class options to contain the surrounding brackets,
-- pandoc does not.
"latex_class_options" -> ("classoption",) <$>
- metaModifiedString (filter (`notElem` "[]"))
+ metaModifiedString (T.filter (`notElem` ("[]" :: String)))
"html_head" -> (inclKey,) <$>
accumulatingList inclKey (metaExportSnippet "html")
_ -> (key,) <$> metaString
@@ -98,25 +100,25 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue)
metaInlinesCommaSeparated = do
- itemStrs <- many1 (noneOf ",\n") `sepBy1` char ','
+ itemStrs <- many1Char (noneOf ",\n") `sepBy1` char ','
newline
- items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs
+ items <- mapM (parseFromString inlinesTillNewline . (<> "\n")) itemStrs
let toMetaInlines = MetaInlines . B.toList
return $ MetaList . map toMetaInlines <$> sequence items
metaString :: Monad m => OrgParser m (F MetaValue)
metaString = metaModifiedString id
-metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue)
+metaModifiedString :: Monad m => (Text -> Text) -> OrgParser m (F MetaValue)
metaModifiedString f = return . MetaString . f <$> anyLine
-- | Read an format specific meta definition
-metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue)
+metaExportSnippet :: Monad m => Text -> OrgParser m (F MetaValue)
metaExportSnippet format =
return . MetaInlines . B.toList . B.rawInline format <$> anyLine
-- | Accumulate the result of the @parser@ in a list under @key@.
-accumulatingList :: Monad m => String
+accumulatingList :: Monad m => Text
-> OrgParser m (F MetaValue)
-> OrgParser m (F MetaValue)
accumulatingList key p = do
@@ -147,33 +149,33 @@ optionLine = try $ do
"pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
_ -> mzero
-addLinkFormat :: Monad m => String
- -> (String -> String)
+addLinkFormat :: Monad m => Text
+ -> (Text -> Text)
-> OrgParser m ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = M.insert key formatter fs }
-parseLinkFormat :: Monad m => OrgParser m (String, String -> String)
+parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text)
parseLinkFormat = try $ do
- linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
+ linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
return (linkType, linkSubst)
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
-parseFormat :: Monad m => OrgParser m (String -> String)
+parseFormat :: Monad m => OrgParser m (Text -> Text)
parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
where
-- inefficient, but who cares
- replacePlain = try $ (\x -> concat . flip intersperse x)
+ replacePlain = try $ (\x -> T.concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
- replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
+ replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack)
<$> sequence [tillSpecifier 'h', rest]
- justAppend = try $ (++) <$> rest
+ justAppend = try $ (<>) <$> rest
- rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
- tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+ rest = manyTillChar anyChar (eof <|> () <$ oneOf "\n\r")
+ tillSpecifier c = manyTillChar (noneOf "\n\r") (try $ string ('%':c:""))
tagList :: Monad m => OrgParser m [Tag]
tagList = do
@@ -231,41 +233,41 @@ todoSequence = try $ do
(x:xs) -> return $ keywordsToSequence (reverse xs) [x]
where
- todoKeywords :: Monad m => OrgParser m [String]
+ todoKeywords :: Monad m => OrgParser m [Text]
todoKeywords = try $
- let keyword = many1 nonspaceChar <* skipSpaces
+ let keyword = many1Char nonspaceChar <* skipSpaces
endOfKeywords = todoDoneSep <|> void newline
in manyTill keyword (lookAhead endOfKeywords)
todoDoneSep :: Monad m => OrgParser m ()
todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
- keywordsToSequence :: [String] -> [String] -> TodoSequence
+ keywordsToSequence :: [Text] -> [Text] -> TodoSequence
keywordsToSequence todo done =
let todoMarkers = map (TodoMarker Todo) todo
doneMarkers = map (TodoMarker Done) done
in todoMarkers ++ doneMarkers
-macroDefinition :: Monad m => OrgParser m (String, [String] -> String)
+macroDefinition :: Monad m => OrgParser m (Text, [Text] -> Text)
macroDefinition = try $ do
- macroName <- many1 nonspaceChar <* skipSpaces
+ macroName <- many1Char nonspaceChar <* skipSpaces
firstPart <- expansionPart
(elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart)
let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder
return (macroName, expander)
where
placeholder :: Monad m => OrgParser m Int
- placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1 digit
+ placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1Char digit
- expansionPart :: Monad m => OrgParser m String
- expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r")
+ expansionPart :: Monad m => OrgParser m Text
+ expansionPart = try $ manyChar (notFollowedBy placeholder *> noneOf "\n\r")
alternate :: [a] -> [a] -> [a]
alternate [] ys = ys
alternate xs [] = xs
alternate (x:xs) (y:ys) = x : y : alternate xs ys
- reorder :: [Int] -> [String] -> [String]
+ reorder :: [Int] -> [Text] -> [Text]
reorder perm xs =
let element n = take 1 $ drop (n - 1) xs
in concatMap element perm
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index d6dde8b22..cf5583b76 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ParserState
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -63,16 +64,16 @@ import Text.Pandoc.Readers.LaTeX.Types (Macro)
type F = Future OrgParserState
-- | An inline note / footnote containing the note key and its (inline) value.
-type OrgNoteRecord = (String, F Blocks)
+type OrgNoteRecord = (Text, F Blocks)
-- | Table of footnotes
type OrgNoteTable = [OrgNoteRecord]
-- | Map of functions for link transformations. The map key is refers to the
-- link-type, the corresponding function transforms the given link string.
-type OrgLinkFormatters = M.Map String (String -> String)
+type OrgLinkFormatters = M.Map Text (Text -> Text)
-- | Macro expander function
-type MacroExpander = [String] -> String
+type MacroExpander = [Text] -> Text
-- | Tag
-newtype Tag = Tag { fromTag :: String }
+newtype Tag = Tag { fromTag :: Text }
deriving (Show, Eq, Ord)
-- | The states in which a todo item can be
@@ -82,7 +83,7 @@ data TodoState = Todo | Done
-- | A ToDo keyword like @TODO@ or @DONE@.
data TodoMarker = TodoMarker
{ todoMarkerState :: TodoState
- , todoMarkerName :: String
+ , todoMarkerName :: Text
}
deriving (Show, Eq)
@@ -91,7 +92,7 @@ type TodoSequence = [TodoMarker]
-- | Org-mode parser state
data OrgParserState = OrgParserState
- { orgStateAnchorIds :: [String]
+ { orgStateAnchorIds :: [Text]
, orgStateEmphasisCharStack :: [Char]
, orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before
-- emphasis; spaces and newlines are
@@ -102,13 +103,13 @@ data OrgParserState = OrgParserState
, orgStateExcludeTags :: Set.Set Tag
, orgStateExcludeTagsChanged :: Bool
, orgStateExportSettings :: ExportSettings
- , orgStateIdentifiers :: Set.Set String
- , orgStateIncludeFiles :: [String]
+ , orgStateIdentifiers :: Set.Set Text
+ , orgStateIncludeFiles :: [Text]
, orgStateLastForbiddenCharPos :: Maybe SourcePos
, orgStateLastPreCharPos :: Maybe SourcePos
, orgStateLastStrPos :: Maybe SourcePos
, orgStateLinkFormatters :: OrgLinkFormatters
- , orgStateMacros :: M.Map String MacroExpander
+ , orgStateMacros :: M.Map Text MacroExpander
, orgStateMacroDepth :: Int
, orgStateMeta :: F Meta
, orgStateNotes' :: OrgNoteTable
@@ -212,10 +213,10 @@ activeTodoSequences st =
activeTodoMarkers :: OrgParserState -> TodoSequence
activeTodoMarkers = concat . activeTodoSequences
-lookupMacro :: String -> OrgParserState -> Maybe MacroExpander
+lookupMacro :: Text -> OrgParserState -> Maybe MacroExpander
lookupMacro macroName = M.lookup macroName . orgStateMacros
-registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState
+registerMacro :: (Text, MacroExpander) -> OrgParserState -> OrgParserState
registerMacro (name, expander) st =
let curMacros = orgStateMacros st
in st{ orgStateMacros = M.insert name expander curMacros }
@@ -236,7 +237,7 @@ data ArchivedTreesOption =
-- These settings can be changed via OPTIONS statements.
data ExportSettings = ExportSettings
{ exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees
- , exportDrawers :: Either [String] [String]
+ , exportDrawers :: Either [Text] [Text]
-- ^ Specify drawer names which should be exported. @Left@ names are
-- explicitly excluded from the resulting output while @Right@ means that
-- only the listed drawer names should be included.
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 24aa0779d..718925120 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -32,7 +32,13 @@ module Text.Pandoc.Readers.Org.Parsing
, orgTagWordChar
-- * Re-exports from Text.Pandoc.Parser
, ParserContext (..)
+ , textStr
+ , countChar
+ , manyChar
+ , many1Char
+ , manyTillChar
, many1Till
+ , many1TillChar
, notFollowedBy'
, spaceChar
, nonspaceChar
@@ -98,6 +104,7 @@ module Text.Pandoc.Readers.Org.Parsing
) where
import Prelude
+import Data.Text (Text)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
@@ -108,14 +115,14 @@ import Control.Monad (guard)
import Control.Monad.Reader (ReaderT)
-- | The parser used to read org files.
-type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities
--
-- | Parse any line of text
-anyLine :: Monad m => OrgParser m String
+anyLine :: Monad m => OrgParser m Text
anyLine =
P.anyLine
<* updateLastPreCharPos
@@ -123,7 +130,7 @@ anyLine =
-- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character
-- allowed before emphasised text.
-parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
+parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a
parseFromString parser str' = do
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
result <- P.parseFromString parser str'
@@ -142,7 +149,7 @@ newline =
<* updateLastForbiddenCharPos
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: Monad m => OrgParser m [Char]
+blanklines :: Monad m => OrgParser m Text
blanklines =
P.blanklines
<* updateLastPreCharPos
@@ -192,21 +199,21 @@ updateLastPreCharPos = getPosition >>= \p ->
--
-- | Read the key of a plist style key-value list.
-orgArgKey :: Monad m => OrgParser m String
+orgArgKey :: Monad m => OrgParser m Text
orgArgKey = try $
skipSpaces *> char ':'
- *> many1 orgArgWordChar
+ *> many1Char orgArgWordChar
-- | Read the value of a plist style key-value list.
-orgArgWord :: Monad m => OrgParser m String
-orgArgWord = many1 orgArgWordChar
+orgArgWord :: Monad m => OrgParser m Text
+orgArgWord = many1Char orgArgWordChar
-- | Chars treated as part of a word in plists.
orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_"
-orgTagWord :: Monad m => OrgParser m String
-orgTagWord = many1 orgTagWordChar
+orgTagWord :: Monad m => OrgParser m Text
+orgTagWord = many1Char orgTagWordChar
orgTagWordChar :: Monad m => OrgParser m Char
orgTagWordChar = alphaNum <|> oneOf "@%#_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 34f958373..be0a2068e 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -10,7 +10,7 @@
Utility functions used in other Pandoc Org modules.
-}
module Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString
+ ( cleanLinkText
, isImageFilename
, originalLang
, translateLang
@@ -19,44 +19,44 @@ module Text.Pandoc.Readers.Org.Shared
import Prelude
import Data.Char (isAlphaNum)
-import Data.List (isPrefixOf)
+import Data.Text (Text)
+import qualified Data.Text as T
import System.FilePath (isValid, takeExtension)
-
+import Text.Pandoc.Shared (elemText)
-- | Check whether the given string looks like the path to of URL of an image.
-isImageFilename :: String -> Bool
-isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri)
+isImageFilename :: Text -> Bool
+isImageFilename fp = hasImageExtension && (isValid (T.unpack fp) || isKnownProtocolUri)
where
- hasImageExtension = takeExtension fp `elem` imageExtensions
- isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols
+ hasImageExtension = takeExtension (T.unpack fp) `elem` imageExtensions
+ isKnownProtocolUri = any (\x -> (x <> "://") `T.isPrefixOf` fp) protocols
imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ]
protocols = [ "file", "http", "https" ]
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
-- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
- case s of
- '/':_ -> Just $ "file://" ++ s -- absolute path
- '.':'/':_ -> Just s -- relative path
- '.':'.':'/':_ -> Just s -- relative path
- -- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s'
- _ -> if isUrl s then Just s else Nothing
- where
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
+cleanLinkText :: Text -> Maybe Text
+cleanLinkText s
+ | Just _ <- T.stripPrefix "/" s = Just $ "file://" <> s -- absolute path
+ | Just _ <- T.stripPrefix "./" s = Just s -- relative path
+ | Just _ <- T.stripPrefix "../" s = Just s -- relative path
+ -- Relative path or URL (file schema)
+ | Just s' <- T.stripPrefix "file:" s = Just $ if "//" `T.isPrefixOf` s' then s else s'
+ | otherwise = if isUrl s then Just s else Nothing
+ where
+ isUrl :: Text -> Bool
+ isUrl cs =
+ let (scheme, path) = T.break (== ':') cs
+ in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme
+ && not (T.null path)
-- | Creates an key-value pair marking the original language name specified for
-- a piece of source code.
-- | Creates an key-value attributes marking the original language name
-- specified for a piece of source code.
-originalLang :: String -> [(String, String)]
+originalLang :: Text -> [(Text, Text)]
originalLang lang =
let transLang = translateLang lang
in if transLang == lang
@@ -66,7 +66,7 @@ originalLang lang =
-- | Translate from Org-mode's programming language identifiers to those used
-- by Pandoc. This is useful to allow for proper syntax highlighting in
-- Pandoc output.
-translateLang :: String -> String
+translateLang :: Text -> Text
translateLang cs =
case cs of
"C" -> "c"
@@ -79,5 +79,5 @@ translateLang cs =
"sqlite" -> "sql"
_ -> cs
-exportsCode :: [(String, String)] -> Bool
+exportsCode :: [(Text, Text)] -> Bool
exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"