aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2016-05-29 19:54:41 +0200
committerAlbert Krewinkel <albert@zeitkraut.de>2016-05-29 21:08:36 +0200
commit4f84cf02c78cc20f7e52d9e82149fdfcc8282960 (patch)
treecb551777c936c90cca58d7059dcc21ffbb2e088f /src/Text
parentbc82123122d0bcd99f1cda07e87eca0b88fa8bbb (diff)
downloadpandoc-4f84cf02c78cc20f7e52d9e82149fdfcc8282960.tar.gz
Org reader: refactor BEGIN…END block parsing
- Reorder functions, grouping related functions together. - Demote simple functions to local functions if they are used just once. - Rename and document functions to increase code readability. - Fix handling of whitespace in blocks, allowing content to be indented less then the block header.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs242
1 files changed, 131 insertions, 111 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index ef9c99cad..dba36fa4d 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -45,7 +45,7 @@ import Text.Pandoc.Shared ( compactify', compactify'DL )
import Control.Arrow ( first )
import Control.Monad ( foldM, guard, mzero )
-import Data.Char ( toLower, toUpper)
+import Data.Char ( isSpace, toLower, toUpper)
import Data.List ( foldl', intersperse, isPrefixOf )
import qualified Data.Map as M
import Data.Maybe ( fromMaybe, isNothing )
@@ -161,85 +161,100 @@ keyValues = try $
-- Org Blocks (#+BEGIN_... / #+END_...)
--
-type BlockProperties = (Int, String) -- (Indentation, Block-Type)
-
-updateIndent :: BlockProperties -> Int -> BlockProperties
-updateIndent (_, blkType) indent = (indent, blkType)
-
+-- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE.
orgBlock :: OrgParser (F Blocks)
orgBlock = try $ do
blockAttrs <- blockAttributes
- blockProp@(_, blkType) <- blockHeaderStart
- ($ blockProp) $
+ blkType <- blockHeaderStart
+ ($ blkType) $
case blkType of
- "comment" -> withRaw' (const mempty)
- "html" -> withRaw' (return . (B.rawBlock blkType))
- "latex" -> withRaw' (return . (B.rawBlock blkType))
- "ascii" -> withRaw' (return . (B.rawBlock blkType))
- "example" -> withRaw' (return . exampleCode)
- "quote" -> withParsed (fmap B.blockQuote)
+ "comment" -> rawBlockLines (const mempty)
+ "html" -> rawBlockLines (return . (B.rawBlock blkType))
+ "latex" -> rawBlockLines (return . (B.rawBlock blkType))
+ "ascii" -> rawBlockLines (return . (B.rawBlock blkType))
+ "example" -> rawBlockLines (return . exampleCode)
+ "quote" -> parseBlockLines (fmap B.blockQuote)
"verse" -> verseBlock
"src" -> codeBlock blockAttrs
- _ -> withParsed (fmap $ divWithClass blkType)
-
-blockHeaderStart :: OrgParser (Int, String)
-blockHeaderStart = try $ (,) <$> indentation <*> blockType
+ _ -> parseBlockLines (fmap $ B.divWith (mempty, [blkType], mempty))
where
- blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord)
+ blockHeaderStart :: OrgParser String
+ blockHeaderStart = try $ do
+ skipSpaces
+ blockType <- stringAnyCase "#+begin_" *> orgArgWord
+ return (map toLower blockType)
-indentation :: OrgParser Int
-indentation = try $ do
- tabStop <- getOption readerTabStop
- s <- many spaceChar
- return $ spaceLength tabStop s
+rawBlockLines :: (String -> F Blocks) -> String -> OrgParser (F Blocks)
+rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType))
-spaceLength :: Int -> String -> Int
-spaceLength tabStop s = (sum . map charLen) s
+parseBlockLines :: (F Blocks -> F Blocks) -> String -> OrgParser (F Blocks)
+parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent))
where
- charLen ' ' = 1
- charLen '\t' = tabStop
- charLen _ = 0
-
-withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
-withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
-
-withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
-withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
-
+ parsedBlockContent :: OrgParser (F Blocks)
+ parsedBlockContent = try $ do
+ raw <- rawBlockContent blockType
+ parseFromString blocks (raw ++ "\n")
+
+-- | Read the raw string content of a block
+rawBlockContent :: String -> OrgParser String
+rawBlockContent blockType = try $ do
+ blkLines <- manyTill rawLine blockEnder
+ tabLen <- getOption readerTabStop
+ return
+ . unlines
+ . stripIndent
+ . map (tabsToSpaces tabLen . commaEscaped)
+ $ blkLines
+ where
+ rawLine :: OrgParser String
+ rawLine = try $ ("" <$ blankline) <|> anyLine
+
+ blockEnder :: OrgParser ()
+ blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
+
+ stripIndent :: [String] -> [String]
+ stripIndent strs = map (drop (shortestIndent strs)) strs
+
+ shortestIndent :: [String] -> Int
+ shortestIndent = minimum
+ . map (length . takeWhile isSpace)
+ . filter (not . null)
+
+ tabsToSpaces :: Int -> String -> String
+ tabsToSpaces _ [] = []
+ tabsToSpaces tabLen cs'@(c:cs) =
+ case c of
+ ' ' -> ' ':tabsToSpaces tabLen cs
+ '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs
+ _ -> cs'
+
+ commaEscaped :: String -> String
+ commaEscaped (',':cs@('*':_)) = cs
+ commaEscaped (',':cs@('#':'+':_)) = cs
+ commaEscaped (' ':cs) = ' ':commaEscaped cs
+ commaEscaped ('\t':cs) = '\t':commaEscaped cs
+ commaEscaped cs = cs
+
+-- | Read but ignore all remaining block headers.
ignHeaders :: OrgParser ()
ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-divWithClass :: String -> Blocks -> Blocks
-divWithClass cls = B.divWith ("", [cls], [])
-
-verseBlock :: BlockProperties -> OrgParser (F Blocks)
-verseBlock blkProp = try $ do
+verseBlock :: String -> OrgParser (F Blocks)
+verseBlock blockType = try $ do
ignHeaders
- content <- rawBlockContent blkProp
+ content <- rawBlockContent blockType
fmap B.para . mconcat . intersperse (pure B.linebreak)
<$> mapM (parseFromString inlines) (map (++ "\n") . lines $ content)
-exportsCode :: [(String, String)] -> Bool
-exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
- || ("rundoc-exports", "results") `elem` attrs)
-
-exportsResults :: [(String, String)] -> Bool
-exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
- || ("rundoc-exports", "both") `elem` attrs
-
-followingResultsBlock :: OrgParser (Maybe (F Blocks))
-followingResultsBlock =
- optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:"
- *> blankline
- *> block)
-
-codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks)
-codeBlock blockAttrs blkProp = do
+-- | Read a code block and the associated results block if present. Which of
+-- boths blocks is included in the output is determined using the "exports"
+-- argument in the block header.
+codeBlock :: BlockAttributes -> String -> OrgParser (F Blocks)
+codeBlock blockAttrs blockType = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
- leadingIndent <- lookAhead indentation
- content <- rawBlockContent (updateIndent blkProp leadingIndent)
- resultsContent <- followingResultsBlock
+ content <- rawBlockContent blockType
+ resultsContent <- trailingResultsBlock
let id' = fromMaybe mempty $ blockAttrName blockAttrs
let includeCode = exportsCode kv
let includeResults = exportsResults kv
@@ -248,36 +263,31 @@ codeBlock blockAttrs blkProp = do
(labelDiv codeBlck)
(blockAttrCaption blockAttrs)
let resultBlck = fromMaybe mempty resultsContent
- return $ (if includeCode then labelledBlck else mempty)
- <> (if includeResults then resultBlck else mempty)
+ return $
+ (if includeCode then labelledBlck else mempty) <>
+ (if includeResults then resultBlck else mempty)
where
+ labelDiv :: Blocks -> F Inlines -> F Blocks
labelDiv blk value =
- B.divWith nullAttr <$> (mappend <$> labelledBlock value
- <*> pure blk)
- labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
+ B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk)
-rawBlockContent :: BlockProperties -> OrgParser String
-rawBlockContent (indent, blockType) = try $
- unlines . map commaEscaped <$> manyTill indentedLine blockEnder
- where
- indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
- blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType)
+ labelledBlock :: F Inlines -> F Blocks
+ labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
-parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
-parsedBlockContent blkProps = try $ do
- raw <- rawBlockContent blkProps
- parseFromString blocks (raw ++ "\n")
+exportsCode :: [(String, String)] -> Bool
+exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
+ || ("rundoc-exports", "results") `elem` attrs)
--- indent by specified number of spaces (or equiv. tabs)
-indentWith :: Int -> OrgParser String
-indentWith num = do
- tabStop <- getOption readerTabStop
- if num < tabStop
- then count num (char ' ')
- else choice [ try (count num (char ' '))
- , try (char '\t' >> count (num - tabStop) (char ' ')) ]
+exportsResults :: [(String, String)] -> Bool
+exportsResults attrs = ("rundoc-exports", "results") `elem` attrs
+ || ("rundoc-exports", "both") `elem` attrs
-type SwitchOption = (Char, Maybe String)
+trailingResultsBlock :: OrgParser (Maybe (F Blocks))
+trailingResultsBlock = optionMaybe . try $ do
+ blanklines
+ stringAnyCase "#+RESULTS:"
+ blankline
+ block
-- | Parse code block arguments
-- TODO: We currently don't handle switches.
@@ -297,8 +307,7 @@ codeHeaderArgs = try $ do
hasRundocParameters = not . null
toRundocAttrib = first ("rundoc-" ++)
-
-switch :: OrgParser SwitchOption
+switch :: OrgParser (Char, Maybe String)
switch = try $ simpleSwitch <|> lineNumbersSwitch
where
simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
@@ -334,24 +343,9 @@ orgParamValue :: OrgParser String
orgParamValue = try $
skipSpaces
*> notFollowedBy (char ':' )
- *> many1 (noneOf "\t\n\r ")
+ *> many1 nonspaceChar
<* skipSpaces
-commaEscaped :: String -> String
-commaEscaped (',':cs@('*':_)) = cs
-commaEscaped (',':cs@('#':'+':_)) = cs
-commaEscaped cs = cs
-
-example :: OrgParser (F Blocks)
-example = try $ do
- return . return . exampleCode =<< unlines <$> many1 exampleLine
-
-exampleCode :: String -> Blocks
-exampleCode = B.codeBlockWith ("", ["example"], [])
-
-exampleLine :: OrgParser String
-exampleLine = try $ exampleLineStart *> anyLine
-
horizontalRule :: OrgParser (F Blocks)
horizontalRule = return B.horizontalRule <$ try hline
@@ -444,18 +438,26 @@ figure = try $ do
selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']'
+--
+-- Examples
+--
+
+-- | Example code marked up by a leading colon.
+example :: OrgParser (F Blocks)
+example = try $ do
+ return . return . exampleCode =<< unlines <$> many1 exampleLine
+ where
+ exampleLine :: OrgParser String
+ exampleLine = try $ exampleLineStart *> anyLine
+
+exampleCode :: String -> Blocks
+exampleCode = B.codeBlockWith ("", ["example"], [])
+
--
-- Comments, Options and Metadata
--
-addLinkFormat :: String
- -> (String -> String)
- -> OrgParser ()
-addLinkFormat key formatter = updateState $ \s ->
- let fs = orgStateLinkFormatters s
- in s{ orgStateLinkFormatters = M.insert key formatter fs }
-
specialLine :: OrgParser (F Blocks)
specialLine = fmap return . try $ metaLine <|> commentLine
@@ -492,6 +494,14 @@ optionLine = try $ do
"options" -> () <$ sepBy spaces exportSetting
_ -> mzero
+addLinkFormat :: String
+ -> (String -> String)
+ -> OrgParser ()
+addLinkFormat key formatter = updateState $ \s ->
+ let fs = orgStateLinkFormatters s
+ in s{ orgStateLinkFormatters = M.insert key formatter fs }
+
+
--
-- Export Settings
--
@@ -884,7 +894,17 @@ listContinuation markerLength = try $
notFollowedBy' blankline
*> (mappend <$> (concat <$> many1 listLine)
<*> many blankline)
- where listLine = try $ indentWith markerLength *> anyLineNewline
+ where
+ listLine = try $ indentWith markerLength *> anyLineNewline
+
+ -- indent by specified number of spaces (or equiv. tabs)
+ indentWith :: Int -> OrgParser String
+ indentWith num = do
+ tabStop <- getOption readerTabStop
+ if num < tabStop
+ then count num (char ' ')
+ else choice [ try (count num (char ' '))
+ , try (char '\t' >> count (num - tabStop) (char ' ')) ]
-- | Parse any line, include the final newline in the output.
anyLineNewline :: OrgParser String