aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-05-09 09:51:19 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-05-09 09:51:19 -0700
commitc092a97132e9dc5c654e0e9d077014332fbdc4bc (patch)
treeb24e06d60092d73e1ba45851e16c732d5b0c105d
parent8afbd7e66499c4c9a24b225d3af01d353e0876ca (diff)
parent07694b30184bcf2ed0e2998016df394f47a1996f (diff)
downloadpandoc-c092a97132e9dc5c654e0e9d077014332fbdc4bc.tar.gz
Merge pull request #1289 from tarleb/code-block-headers
Org reader: Support code block headers, fix reading of block content
-rw-r--r--src/Text/Pandoc/Readers/Org.hs186
-rw-r--r--tests/Tests/Readers/Org.hs23
2 files changed, 140 insertions, 69 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index dba61dfe0..0f218d43f 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -50,7 +50,7 @@ import Data.Char (isAlphaNum, toLower)
import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
-import Data.Maybe (listToMaybe, fromMaybe, isJust)
+import Data.Maybe (fromMaybe, isJust)
import Data.Monoid (Monoid, mconcat, mempty, mappend)
import Network.HTTP (urlEncode)
@@ -162,7 +162,8 @@ popInlineCharStack = updateState $ \s ->
s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
surroundingEmphasisChar :: OrgParser [Char]
-surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
+surroundingEmphasisChar =
+ take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
startEmphasisNewlinesCounting :: Int -> OrgParser ()
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
@@ -170,7 +171,7 @@ startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
decEmphasisNewlinesCount :: OrgParser ()
decEmphasisNewlinesCount = updateState $ \s ->
- s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
+ s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
newlinesCountWithinLimits :: OrgParser Bool
newlinesCountWithinLimits = do
@@ -275,7 +276,7 @@ parseBlockAttributes = do
where
attribute :: OrgParser (String, String)
attribute = try $ do
- key <- metaLineStart *> many1Till (noneOf "\n\r") (char ':')
+ key <- metaLineStart *> many1Till nonspaceChar (char ':')
val <- skipSpaces *> anyLine
return (map toLower key, val)
@@ -296,57 +297,74 @@ lookupInlinesAttr attr = try $ do
-- Org Blocks (#+BEGIN_... / #+END_...)
--
+type BlockProperties = (Int, String) -- (Indentation, Block-Type)
+
orgBlock :: OrgParser (F Blocks)
orgBlock = try $ do
- (indent, blockType, args) <- blockHeader
- content <- rawBlockContent indent blockType
- contentBlocks <- parseFromString parseBlocks (content ++ "\n")
- let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
- case blockType of
- "comment" -> return mempty
- "html" -> returnF $ B.rawBlock "html" content
- "latex" -> returnF $ B.rawBlock "latex" content
- "ascii" -> returnF $ B.rawBlock "ascii" content
- "example" -> returnF $ exampleCode content
- "quote" -> return $ B.blockQuote <$> contentBlocks
- "verse" -> parseVerse content
- "src" -> codeBlockWithAttr classArgs content
- _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks
+ blockProp@(_, blkType) <- blockHeaderStart
+ ($ blockProp) $
+ case blkType of
+ "comment" -> withRaw' (const mempty)
+ "html" -> withRaw' (return . (B.rawBlock blkType))
+ "latex" -> withRaw' (return . (B.rawBlock blkType))
+ "ascii" -> withRaw' (return . (B.rawBlock blkType))
+ "example" -> withRaw' (return . exampleCode)
+ "quote" -> withParsed (fmap B.blockQuote)
+ "verse" -> verseBlock
+ "src" -> codeBlock
+ _ -> withParsed (fmap $ divWithClass blkType)
+
+blockHeaderStart :: OrgParser (Int, String)
+blockHeaderStart = try $ (,) <$> indent <*> blockType
where
- parseVerse :: String -> OrgParser (F Blocks)
- parseVerse cs =
- fmap B.para . mconcat . intersperse (pure B.linebreak)
- <$> mapM (parseFromString parseInlines) (lines cs)
-
-blockHeader :: OrgParser (Int, String, [String])
-blockHeader = (,,) <$> blockIndent
- <*> blockType
- <*> (skipSpaces *> blockArgs)
- where blockIndent = length <$> many spaceChar
- blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
- blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
-
-codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks)
-codeBlockWithAttr classArgs content = do
- identifier <- fromMaybe "" <$> lookupBlockAttribute "name"
- caption <- lookupInlinesAttr "caption"
- let codeBlck = B.codeBlockWith (identifier, classArgs, []) content
- return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
+ indent = length <$> many spaceChar
+ blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar)
+
+withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
+
+withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
+
+ignHeaders :: OrgParser ()
+ignHeaders = (() <$ newline) <|> (() <$ anyLine)
+
+divWithClass :: String -> Blocks -> Blocks
+divWithClass cls = B.divWith ("", [cls], [])
+
+verseBlock :: BlockProperties -> OrgParser (F Blocks)
+verseBlock blkProp = try $ do
+ ignHeaders
+ content <- rawBlockContent blkProp
+ fmap B.para . mconcat . intersperse (pure B.linebreak)
+ <$> mapM (parseFromString parseInlines) (lines content)
+
+codeBlock :: BlockProperties -> OrgParser (F Blocks)
+codeBlock blkProp = do
+ skipSpaces
+ (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
+ id' <- fromMaybe "" <$> lookupBlockAttribute "name"
+ content <- rawBlockContent blkProp
+ let codeBlck = B.codeBlockWith ( id', classes, kv ) content
+ maybe (pure codeBlck) (labelDiv codeBlck) <$> lookupInlinesAttr "caption"
where
labelDiv blk value =
B.divWith nullAttr <$> (mappend <$> labelledBlock value
<*> pure blk)
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
-rawBlockContent :: Int -> String -> OrgParser String
-rawBlockContent indent blockType =
+rawBlockContent :: BlockProperties -> OrgParser String
+rawBlockContent (indent, blockType) = try $
unlines . map commaEscaped <$> manyTill indentedLine blockEnder
where
- indentedLine = try $ choice [ blankline *> pure "\n"
- , indentWith indent *> anyLine
- ]
+ indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine)
blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
+parsedBlockContent blkProps = try $ do
+ raw <- rawBlockContent blkProps
+ parseFromString parseBlocks (raw ++ "\n")
+
-- indent by specified number of spaces (or equiv. tabs)
indentWith :: Int -> OrgParser String
indentWith num = do
@@ -356,6 +374,34 @@ indentWith num = do
else choice [ try (count num (char ' '))
, try (char '\t' >> count (num - tabStop) (char ' ')) ]
+type SwitchOption = (Char, Maybe String)
+
+orgArgWord :: OrgParser String
+orgArgWord = many1 orgArgWordChar
+
+-- | Parse code block arguments
+-- TODO: We currently don't handle switches.
+codeHeaderArgs :: OrgParser ([String], [(String, String)])
+codeHeaderArgs = try $ do
+ language <- skipSpaces *> orgArgWord
+ _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar))
+ parameters <- manyTill blockOption newline
+ let pandocLang = translateLang language
+ return $
+ if hasRundocParameters parameters
+ then ( [ pandocLang, rundocBlockClass ]
+ , map toRundocAttrib (("language", language) : parameters)
+ )
+ else ([ pandocLang ], parameters)
+ where hasRundocParameters = not . null
+
+switch :: OrgParser SwitchOption
+switch = try $ simpleSwitch <|> lineNumbersSwitch
+ where
+ simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter)
+ lineNumbersSwitch = (\ls -> ('l', Just ls)) <$>
+ (string "-l \"" *> many1Till nonspaceChar (char '"'))
+
translateLang :: String -> String
translateLang "C" = "c"
translateLang "C++" = "cpp"
@@ -367,6 +413,32 @@ translateLang "sh" = "bash"
translateLang "sqlite" = "sql"
translateLang cs = cs
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
+-- | The class-name used to mark rundoc blocks.
+rundocBlockClass :: String
+rundocBlockClass = rundocPrefix ++ "block"
+
+blockOption :: OrgParser (String, String)
+blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
+
+orgArgKey :: OrgParser String
+orgArgKey = try $
+ skipSpaces *> char ':'
+ *> many1 orgArgWordChar
+
+orgArgValue :: OrgParser String
+orgArgValue = try $
+ skipSpaces *> many1 orgArgWordChar <* skipSpaces
+
+orgArgWordChar :: OrgParser Char
+orgArgWordChar = alphaNum <|> oneOf "-_"
+
+toRundocAttrib :: (String, String) -> (String, String)
+toRundocAttrib = first ("rundoc-" ++)
+
commaEscaped :: String -> String
commaEscaped (',':cs@('*':_)) = cs
commaEscaped (',':cs@('#':'+':_)) = cs
@@ -391,7 +463,7 @@ drawer = try $ do
drawerStart :: OrgParser String
drawerStart = try $
- skipSpaces *> drawerName <* skipSpaces <* newline
+ skipSpaces *> drawerName <* skipSpaces <* P.newline
where drawerName = try $ char ':' *> validDrawerName <* char ':'
validDrawerName = stringAnyCase "PROPERTIES"
<|> stringAnyCase "LOGBOOK"
@@ -401,7 +473,7 @@ drawerLine = try anyLine
drawerEnd :: OrgParser String
drawerEnd = try $
- skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
+ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* P.newline
--
@@ -412,7 +484,7 @@ drawerEnd = try $
figure :: OrgParser (F Blocks)
figure = try $ do
(cap, nam) <- nameAndCaption
- src <- skipSpaces *> selfTarget <* skipSpaces <* newline
+ src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
guard (isImageFilename src)
return $ do
cap' <- cap
@@ -1002,30 +1074,6 @@ inlineCodeBlock = try $ do
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
where enclosedByPair s e p = char s *> many1Till p (char e)
--- | The class-name used to mark rundoc blocks.
-rundocBlockClass :: String
-rundocBlockClass = "rundoc-block"
-
-blockOption :: OrgParser (String, String)
-blockOption = try $ (,) <$> orgArgKey <*> orgArgValue
-
-orgArgKey :: OrgParser String
-orgArgKey = try $
- skipSpaces *> char ':'
- *> many1 orgArgWordChar
- <* many1 spaceChar
-
-orgArgValue :: OrgParser String
-orgArgValue = try $
- skipSpaces *> many1 orgArgWordChar
- <* skipSpaces
-
-orgArgWordChar :: OrgParser Char
-orgArgWordChar = alphaNum <|> oneOf "-_"
-
-toRundocAttrib :: (String, String) -> (String, String)
-toRundocAttrib = first ("rundoc-" ++)
-
emph :: OrgParser (F Inlines)
emph = fmap B.emph <$> emphasisBetween '/'
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 949976aba..87b0d0c90 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -822,6 +822,20 @@ tests =
in mconcat [ para $ spcSep [ "Low", "German", "greeting" ]
, codeBlockWith attr' code'
]
+ , "Source block with rundoc/babel arguments" =:
+ unlines [ "#+BEGIN_SRC emacs-lisp :exports both"
+ , "(progn (message \"Hello, World!\")"
+ , " (+ 23 42))"
+ , "#+END_SRC" ] =?>
+ let classes = [ "commonlisp" -- as kate doesn't know emacs-lisp syntax
+ , "rundoc-block"
+ ]
+ params = [ ("rundoc-language", "emacs-lisp")
+ , ("rundoc-exports", "both")
+ ]
+ code' = unlines [ "(progn (message \"Hello, World!\")"
+ , " (+ 23 42))" ]
+ in codeBlockWith ("", classes, params) code'
, "Example block" =:
unlines [ "#+begin_example"
@@ -906,5 +920,14 @@ tests =
(unlines [ "fmap id = id"
, "fmap (p . q) = (fmap p) . (fmap q)"
])))
+
+ , "Convert blank lines in blocks to single newlines" =:
+ unlines [ "#+begin_html"
+ , ""
+ , "<span>boring</span>"
+ , ""
+ , "#+end_html"
+ ] =?>
+ rawBlock "html" "\n<span>boring</span>\n\n"
]
]