From 7760504bb26f215e7d0c57da843f1f1dcc8c1186 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 8 May 2014 17:01:58 +0200 Subject: Org reader: refactor #+BEGIN..#+END block parsing code --- src/Text/Pandoc/Readers/Org.hs | 122 +++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 42 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index dba61dfe0..9df8ce0b3 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -50,7 +50,7 @@ import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M -import Data.Maybe (listToMaybe, fromMaybe, isJust) +import Data.Maybe (fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) import Network.HTTP (urlEncode) @@ -162,7 +162,8 @@ popInlineCharStack = updateState $ \s -> s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } surroundingEmphasisChar :: OrgParser [Char] -surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState +surroundingEmphasisChar = + take 1 . drop 1 . orgStateEmphasisCharStack <$> getState startEmphasisNewlinesCounting :: Int -> OrgParser () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> @@ -170,7 +171,7 @@ startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> decEmphasisNewlinesCount :: OrgParser () decEmphasisNewlinesCount = updateState $ \s -> - s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } + s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } newlinesCountWithinLimits :: OrgParser Bool newlinesCountWithinLimits = do @@ -296,41 +297,60 @@ lookupInlinesAttr attr = try $ do -- Org Blocks (#+BEGIN_... / #+END_...) -- +type BlockProperties = (Int, String) -- (Indentation, Block-Type) + orgBlock :: OrgParser (F Blocks) orgBlock = try $ do - (indent, blockType, args) <- blockHeader - content <- rawBlockContent indent blockType - contentBlocks <- parseFromString parseBlocks (content ++ "\n") - let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] - case blockType of - "comment" -> return mempty - "html" -> returnF $ B.rawBlock "html" content - "latex" -> returnF $ B.rawBlock "latex" content - "ascii" -> returnF $ B.rawBlock "ascii" content - "example" -> returnF $ exampleCode content - "quote" -> return $ B.blockQuote <$> contentBlocks - "verse" -> parseVerse content - "src" -> codeBlockWithAttr classArgs content - _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks + blockProp@(_, blkType) <- blockHeaderStart + ($ blockProp) $ + case blkType of + "comment" -> withRaw' (const mempty) + "html" -> withRaw' (return . (B.rawBlock blkType)) + "latex" -> withRaw' (return . (B.rawBlock blkType)) + "ascii" -> withRaw' (return . (B.rawBlock blkType)) + "example" -> withRaw' (return . exampleCode) + "quote" -> withParsed (fmap B.blockQuote) + "verse" -> verseBlock + "src" -> codeBlock + _ -> withParsed (fmap $ divWithClass blkType) + +blockHeaderStart :: OrgParser (Int, String) +blockHeaderStart = try $ (,) <$> indent <*> blockType where - parseVerse :: String -> OrgParser (F Blocks) - parseVerse cs = - fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString parseInlines) (lines cs) - -blockHeader :: OrgParser (Int, String, [String]) -blockHeader = (,,) <$> blockIndent - <*> blockType - <*> (skipSpaces *> blockArgs) - where blockIndent = length <$> many spaceChar - blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) - blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline - -codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks) -codeBlockWithAttr classArgs content = do - identifier <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" - let codeBlck = B.codeBlockWith (identifier, classArgs, []) content + indent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar) + +withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) + +withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) + +ignHeaders :: OrgParser () +ignHeaders = (() <$ newline) <|> (() <$ anyLine) + +divWithClass :: String -> Blocks -> Blocks +divWithClass cls = B.divWith ("", [cls], []) + +verseBlock :: BlockProperties -> OrgParser (F Blocks) +verseBlock blkProp = try $ do + ignHeaders + content <- rawBlockContent blkProp + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (lines content) + +codeBlock :: BlockProperties -> OrgParser (F Blocks) +codeBlock blkProp = do + skipSpaces + language <- optionMaybe orgArgWord + (classes, kv) <- codeHeaderArgs + id' <- fromMaybe "" <$> lookupBlockAttribute "name" + caption <- lookupInlinesAttr "caption" + content <- rawBlockContent blkProp + let attr = ( id' + , maybe id (\l -> (l:)) language $ classes + , kv ) + let codeBlck = B.codeBlockWith attr content return $ maybe (pure codeBlck) (labelDiv codeBlck) caption where labelDiv blk value = @@ -338,14 +358,21 @@ codeBlockWithAttr classArgs content = do <*> pure blk) labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) -rawBlockContent :: Int -> String -> OrgParser String -rawBlockContent indent blockType = +rawBlockContent :: BlockProperties -> OrgParser String +rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ + choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ + indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) +parsedBlockContent blkProps = try $ do + raw <- rawBlockContent blkProps + parseFromString parseBlocks (raw ++ "\n") -- indent by specified number of spaces (or equiv. tabs) indentWith :: Int -> OrgParser String @@ -356,6 +383,13 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +orgArgWord :: OrgParser String +orgArgWord = many1 orgArgWordChar + +codeHeaderArgs :: OrgParser ([String], [(String, String)]) +codeHeaderArgs = + (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline + translateLang :: String -> String translateLang "C" = "c" translateLang "C++" = "cpp" @@ -1002,9 +1036,13 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + -- | The class-name used to mark rundoc blocks. rundocBlockClass :: String -rundocBlockClass = "rundoc-block" +rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) blockOption = try $ (,) <$> orgArgKey <*> orgArgValue -- cgit v1.2.3