From 7760504bb26f215e7d0c57da843f1f1dcc8c1186 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Thu, 8 May 2014 17:01:58 +0200 Subject: Org reader: refactor #+BEGIN..#+END block parsing code --- src/Text/Pandoc/Readers/Org.hs | 122 +++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 42 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index dba61dfe0..9df8ce0b3 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -50,7 +50,7 @@ import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) import qualified Data.Map as M -import Data.Maybe (listToMaybe, fromMaybe, isJust) +import Data.Maybe (fromMaybe, isJust) import Data.Monoid (Monoid, mconcat, mempty, mappend) import Network.HTTP (urlEncode) @@ -162,7 +162,8 @@ popInlineCharStack = updateState $ \s -> s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } surroundingEmphasisChar :: OrgParser [Char] -surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState +surroundingEmphasisChar = + take 1 . drop 1 . orgStateEmphasisCharStack <$> getState startEmphasisNewlinesCounting :: Int -> OrgParser () startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> @@ -170,7 +171,7 @@ startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> decEmphasisNewlinesCount :: OrgParser () decEmphasisNewlinesCount = updateState $ \s -> - s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } + s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } newlinesCountWithinLimits :: OrgParser Bool newlinesCountWithinLimits = do @@ -296,41 +297,60 @@ lookupInlinesAttr attr = try $ do -- Org Blocks (#+BEGIN_... / #+END_...) -- +type BlockProperties = (Int, String) -- (Indentation, Block-Type) + orgBlock :: OrgParser (F Blocks) orgBlock = try $ do - (indent, blockType, args) <- blockHeader - content <- rawBlockContent indent blockType - contentBlocks <- parseFromString parseBlocks (content ++ "\n") - let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] - case blockType of - "comment" -> return mempty - "html" -> returnF $ B.rawBlock "html" content - "latex" -> returnF $ B.rawBlock "latex" content - "ascii" -> returnF $ B.rawBlock "ascii" content - "example" -> returnF $ exampleCode content - "quote" -> return $ B.blockQuote <$> contentBlocks - "verse" -> parseVerse content - "src" -> codeBlockWithAttr classArgs content - _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks + blockProp@(_, blkType) <- blockHeaderStart + ($ blockProp) $ + case blkType of + "comment" -> withRaw' (const mempty) + "html" -> withRaw' (return . (B.rawBlock blkType)) + "latex" -> withRaw' (return . (B.rawBlock blkType)) + "ascii" -> withRaw' (return . (B.rawBlock blkType)) + "example" -> withRaw' (return . exampleCode) + "quote" -> withParsed (fmap B.blockQuote) + "verse" -> verseBlock + "src" -> codeBlock + _ -> withParsed (fmap $ divWithClass blkType) + +blockHeaderStart :: OrgParser (Int, String) +blockHeaderStart = try $ (,) <$> indent <*> blockType where - parseVerse :: String -> OrgParser (F Blocks) - parseVerse cs = - fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString parseInlines) (lines cs) - -blockHeader :: OrgParser (Int, String, [String]) -blockHeader = (,,) <$> blockIndent - <*> blockType - <*> (skipSpaces *> blockArgs) - where blockIndent = length <$> many spaceChar - blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter) - blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline - -codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks) -codeBlockWithAttr classArgs content = do - identifier <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" - let codeBlck = B.codeBlockWith (identifier, classArgs, []) content + indent = length <$> many spaceChar + blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar) + +withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) + +withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) + +ignHeaders :: OrgParser () +ignHeaders = (() <$ newline) <|> (() <$ anyLine) + +divWithClass :: String -> Blocks -> Blocks +divWithClass cls = B.divWith ("", [cls], []) + +verseBlock :: BlockProperties -> OrgParser (F Blocks) +verseBlock blkProp = try $ do + ignHeaders + content <- rawBlockContent blkProp + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (lines content) + +codeBlock :: BlockProperties -> OrgParser (F Blocks) +codeBlock blkProp = do + skipSpaces + language <- optionMaybe orgArgWord + (classes, kv) <- codeHeaderArgs + id' <- fromMaybe "" <$> lookupBlockAttribute "name" + caption <- lookupInlinesAttr "caption" + content <- rawBlockContent blkProp + let attr = ( id' + , maybe id (\l -> (l:)) language $ classes + , kv ) + let codeBlck = B.codeBlockWith attr content return $ maybe (pure codeBlck) (labelDiv codeBlck) caption where labelDiv blk value = @@ -338,14 +358,21 @@ codeBlockWithAttr classArgs content = do <*> pure blk) labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) -rawBlockContent :: Int -> String -> OrgParser String -rawBlockContent indent blockType = +rawBlockContent :: BlockProperties -> OrgParser String +rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ + choice [ blankline *> pure "\n" + , indentWith indent *> anyLine + ] + blockEnder = try $ + indentWith indent *> stringAnyCase ("#+end_" <> blockType) + +parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) +parsedBlockContent blkProps = try $ do + raw <- rawBlockContent blkProps + parseFromString parseBlocks (raw ++ "\n") -- indent by specified number of spaces (or equiv. tabs) indentWith :: Int -> OrgParser String @@ -356,6 +383,13 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +orgArgWord :: OrgParser String +orgArgWord = many1 orgArgWordChar + +codeHeaderArgs :: OrgParser ([String], [(String, String)]) +codeHeaderArgs = + (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline + translateLang :: String -> String translateLang "C" = "c" translateLang "C++" = "cpp" @@ -1002,9 +1036,13 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + -- | The class-name used to mark rundoc blocks. rundocBlockClass :: String -rundocBlockClass = "rundoc-block" +rundocBlockClass = rundocPrefix ++ "block" blockOption :: OrgParser (String, String) blockOption = try $ (,) <$> orgArgKey <*> orgArgValue -- cgit v1.2.3 From 757c4f68f3f3cab99db9499936e3ae4775ebbddf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:07:37 +0200 Subject: Org reader: Support arguments for code blocks The general form of source block headers (`#+BEGIN_SRC
`) was not recognized by the reader. This patch adds support for the above form, adds header arguments to the block's key-value pairs and marks the block as a rundoc block if header arguments are present. This closes #1286. --- src/Text/Pandoc/Readers/Org.hs | 98 ++++++++++++++++++++++++------------------ tests/Tests/Readers/Org.hs | 14 ++++++ 2 files changed, 70 insertions(+), 42 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 9df8ce0b3..c05ac92d0 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -276,7 +276,7 @@ parseBlockAttributes = do where attribute :: OrgParser (String, String) attribute = try $ do - key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':') + key <- metaLineStart *> many1Till nonspaceChar (char ':') val <- skipSpaces *> anyLine return (map toLower key, val) @@ -342,16 +342,11 @@ verseBlock blkProp = try $ do codeBlock :: BlockProperties -> OrgParser (F Blocks) codeBlock blkProp = do skipSpaces - language <- optionMaybe orgArgWord - (classes, kv) <- codeHeaderArgs + (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) id' <- fromMaybe "" <$> lookupBlockAttribute "name" - caption <- lookupInlinesAttr "caption" content <- rawBlockContent blkProp - let attr = ( id' - , maybe id (\l -> (l:)) language $ classes - , kv ) - let codeBlck = B.codeBlockWith attr content - return $ maybe (pure codeBlck) (labelDiv codeBlck) caption + let codeBlck = B.codeBlockWith ( id', classes, kv ) content + maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption" where labelDiv blk value = B.divWith nullAttr <$> (mappend <$> labelledBlock value @@ -383,12 +378,33 @@ indentWith num = do else choice [ try (count num (char ' ')) , try (char '\t' >> count (num - tabStop) (char ' ')) ] +type SwitchOption = (Char, Maybe String) + orgArgWord :: OrgParser String orgArgWord = many1 orgArgWordChar +-- | Parse code block arguments +-- TODO: We currently don't handle switches. codeHeaderArgs :: OrgParser ([String], [(String, String)]) -codeHeaderArgs = - (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline +codeHeaderArgs = try $ do + language <- skipSpaces *> orgArgWord + _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + parameters <- manyTill blockOption newline + let pandocLang = translateLang language + return $ + if hasRundocParameters parameters + then ( [ pandocLang, rundocBlockClass ] + , map toRundocAttrib (("language", language) : parameters) + ) + else ([ pandocLang ], parameters) + where hasRundocParameters = not . null + +switch :: OrgParser SwitchOption +switch = try $ simpleSwitch <|> lineNumbersSwitch + where + simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) + lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> + (string "-l \"" *> many1Till nonspaceChar (char '"')) translateLang :: String -> String translateLang "C" = "c" @@ -401,6 +417,32 @@ translateLang "sh" = "bash" translateLang "sqlite" = "sql" translateLang cs = cs +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = rundocPrefix ++ "block" + +blockOption :: OrgParser (String, String) +blockOption = try $ (,) <$> orgArgKey <*> orgArgValue + +orgArgKey :: OrgParser String +orgArgKey = try $ + skipSpaces *> char ':' + *> many1 orgArgWordChar + +orgArgValue :: OrgParser String +orgArgValue = try $ + skipSpaces *> many1 orgArgWordChar <* skipSpaces + +orgArgWordChar :: OrgParser Char +orgArgWordChar = alphaNum <|> oneOf "-_" + +toRundocAttrib :: (String, String) -> (String, String) +toRundocAttrib = first ("rundoc-" ++) + commaEscaped :: String -> String commaEscaped (',':cs@('*':_)) = cs commaEscaped (',':cs@('#':'+':_)) = cs @@ -425,7 +467,7 @@ drawer = try $ do drawerStart :: OrgParser String drawerStart = try $ - skipSpaces *> drawerName <* skipSpaces <* newline + skipSpaces *> drawerName <* skipSpaces <* P.newline where drawerName = try $ char ':' *> validDrawerName <* char ':' validDrawerName = stringAnyCase "PROPERTIES" <|> stringAnyCase "LOGBOOK" @@ -435,7 +477,7 @@ drawerLine = try anyLine drawerEnd :: OrgParser String drawerEnd = try $ - skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline -- @@ -446,7 +488,7 @@ drawerEnd = try $ figure :: OrgParser (F Blocks) figure = try $ do (cap, nam) <- nameAndCaption - src <- skipSpaces *> selfTarget <* skipSpaces <* newline + src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline guard (isImageFilename src) return $ do cap' <- cap @@ -1036,34 +1078,6 @@ inlineCodeBlock = try $ do returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode where enclosedByPair s e p = char s *> many1Till p (char e) --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - -blockOption :: OrgParser (String, String) -blockOption = try $ (,) <$> orgArgKey <*> orgArgValue - -orgArgKey :: OrgParser String -orgArgKey = try $ - skipSpaces *> char ':' - *> many1 orgArgWordChar - <* many1 spaceChar - -orgArgValue :: OrgParser String -orgArgValue = try $ - skipSpaces *> many1 orgArgWordChar - <* skipSpaces - -orgArgWordChar :: OrgParser Char -orgArgWordChar = alphaNum <|> oneOf "-_" - -toRundocAttrib :: (String, String) -> (String, String) -toRundocAttrib = first ("rundoc-" ++) - emph :: OrgParser (F Inlines) emph = fmap B.emph <$> emphasisBetween '/' diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 949976aba..a78e8861f 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -822,6 +822,20 @@ tests = in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] , codeBlockWith attr' code' ] + , "Source block with rundoc/babel arguments" =: + unlines [ "#+BEGIN_SRC emacs-lisp :exports both" + , "(progn (message \"Hello, World!\")" + , " (+ 23 42))" + , "#+END_SRC" ] =?> + let classes = [ "commonlisp" -- as kate doesn't know emacs-lisp syntax + , "rundoc-block" + ] + params = [ ("rundoc-language", "emacs-lisp") + , ("rundoc-exports", "both") + ] + code' = unlines [ "(progn (message \"Hello, World!\")" + , " (+ 23 42))" ] + in codeBlockWith ("", classes, params) code' , "Example block" =: unlines [ "#+begin_example" -- cgit v1.2.3 From 07694b30184bcf2ed0e2998016df394f47a1996f Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Fri, 9 May 2014 18:23:23 +0200 Subject: Org reader: Fix parsing of blank lines within blocks Blank lines were parsed as two newlines instead of just one. Thanks to Xiao Hanyu (@xiaohanyu) for pointing this out. --- src/Text/Pandoc/Readers/Org.hs | 8 ++------ tests/Tests/Readers/Org.hs | 9 +++++++++ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index c05ac92d0..0f218d43f 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -357,12 +357,8 @@ rawBlockContent :: BlockProperties -> OrgParser String rawBlockContent (indent, blockType) = try $ unlines . map commaEscaped <$> manyTill indentedLine blockEnder where - indentedLine = try $ - choice [ blankline *> pure "\n" - , indentWith indent *> anyLine - ] - blockEnder = try $ - indentWith indent *> stringAnyCase ("#+end_" <> blockType) + indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) + blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType) parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) parsedBlockContent blkProps = try $ do diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index a78e8861f..87b0d0c90 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -920,5 +920,14 @@ tests = (unlines [ "fmap id = id" , "fmap (p . q) = (fmap p) . (fmap q)" ]))) + + , "Convert blank lines in blocks to single newlines" =: + unlines [ "#+begin_html" + , "" + , "boring" + , "" + , "#+end_html" + ] =?> + rawBlock "html" "\nboring\n\n" ] ] -- cgit v1.2.3