diff options
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 263 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 7 | ||||
-rw-r--r-- | tests/Tests/Readers/Org.hs | 16 |
3 files changed, 165 insertions, 121 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index b374acfe2..36645a356 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -45,7 +45,7 @@ import Text.Pandoc.Shared ( compactify', compactify'DL ) import Control.Arrow ( first ) import Control.Monad ( foldM, guard, mzero ) -import Data.Char ( toLower, toUpper) +import Data.Char ( isSpace, toLower, toUpper) import Data.List ( foldl', intersperse, isPrefixOf ) import qualified Data.Map as M import Data.Maybe ( fromMaybe, isNothing ) @@ -116,7 +116,7 @@ blockAttributes = try $ do let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv let name = lookup "NAME" kv caption' <- maybe (return Nothing) - (fmap Just . parseFromString parseInlines) + (fmap Just . parseFromString inlines) caption kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs return $ BlockAttributes @@ -161,85 +161,109 @@ keyValues = try $ -- Org Blocks (#+BEGIN_... / #+END_...) -- -type BlockProperties = (Int, String) -- (Indentation, Block-Type) - -updateIndent :: BlockProperties -> Int -> BlockProperties -updateIndent (_, blkType) indent = (indent, blkType) - +-- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE. orgBlock :: OrgParser (F Blocks) orgBlock = try $ do blockAttrs <- blockAttributes - blockProp@(_, blkType) <- blockHeaderStart - ($ blockProp) $ + blkType <- blockHeaderStart + ($ blkType) $ case blkType of - "comment" -> withRaw' (const mempty) - "html" -> withRaw' (return . (B.rawBlock blkType)) - "latex" -> withRaw' (return . (B.rawBlock blkType)) - "ascii" -> withRaw' (return . (B.rawBlock blkType)) - "example" -> withRaw' (return . exampleCode) - "quote" -> withParsed (fmap B.blockQuote) + "export" -> exportBlock + "comment" -> rawBlockLines (const mempty) + "html" -> rawBlockLines (return . (B.rawBlock blkType)) + "latex" -> rawBlockLines (return . (B.rawBlock blkType)) + "ascii" -> rawBlockLines (return . (B.rawBlock blkType)) + "example" -> rawBlockLines (return . exampleCode) + "quote" -> parseBlockLines (fmap B.blockQuote) "verse" -> verseBlock "src" -> codeBlock blockAttrs - _ -> withParsed (fmap $ divWithClass blkType) - -blockHeaderStart :: OrgParser (Int, String) -blockHeaderStart = try $ (,) <$> indentation <*> blockType + _ -> parseBlockLines (fmap $ B.divWith (mempty, [blkType], mempty)) where - blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) + blockHeaderStart :: OrgParser String + blockHeaderStart = try $ do + skipSpaces + blockType <- stringAnyCase "#+begin_" *> orgArgWord + return (map toLower blockType) -indentation :: OrgParser Int -indentation = try $ do - tabStop <- getOption readerTabStop - s <- many spaceChar - return $ spaceLength tabStop s +rawBlockLines :: (String -> F Blocks) -> String -> OrgParser (F Blocks) +rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) -spaceLength :: Int -> String -> Int -spaceLength tabStop s = (sum . map charLen) s +parseBlockLines :: (F Blocks -> F Blocks) -> String -> OrgParser (F Blocks) +parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent)) where - charLen ' ' = 1 - charLen '\t' = tabStop - charLen _ = 0 - -withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) -withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) - -withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) -withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) - + parsedBlockContent :: OrgParser (F Blocks) + parsedBlockContent = try $ do + raw <- rawBlockContent blockType + parseFromString blocks (raw ++ "\n") + +-- | Read the raw string content of a block +rawBlockContent :: String -> OrgParser String +rawBlockContent blockType = try $ do + blkLines <- manyTill rawLine blockEnder + tabLen <- getOption readerTabStop + return + . unlines + . stripIndent + . map (tabsToSpaces tabLen . commaEscaped) + $ blkLines + where + rawLine :: OrgParser String + rawLine = try $ ("" <$ blankline) <|> anyLine + + blockEnder :: OrgParser () + blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) + + stripIndent :: [String] -> [String] + stripIndent strs = map (drop (shortestIndent strs)) strs + + shortestIndent :: [String] -> Int + shortestIndent = minimum + . map (length . takeWhile isSpace) + . filter (not . null) + + tabsToSpaces :: Int -> String -> String + tabsToSpaces _ [] = [] + tabsToSpaces tabLen cs'@(c:cs) = + case c of + ' ' -> ' ':tabsToSpaces tabLen cs + '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs + _ -> cs' + + commaEscaped :: String -> String + commaEscaped (',':cs@('*':_)) = cs + commaEscaped (',':cs@('#':'+':_)) = cs + commaEscaped (' ':cs) = ' ':commaEscaped cs + commaEscaped ('\t':cs) = '\t':commaEscaped cs + commaEscaped cs = cs + +-- | Read but ignore all remaining block headers. ignHeaders :: OrgParser () ignHeaders = (() <$ newline) <|> (() <$ anyLine) -divWithClass :: String -> Blocks -> Blocks -divWithClass cls = B.divWith ("", [cls], []) +-- | Read a block containing code intended for export in specific backends +-- only. +exportBlock :: String -> OrgParser (F Blocks) +exportBlock blockType = try $ do + exportType <- skipSpaces *> orgArgWord <* ignHeaders + contents <- rawBlockContent blockType + returnF (B.rawBlock (map toLower exportType) contents) -verseBlock :: BlockProperties -> OrgParser (F Blocks) -verseBlock blkProp = try $ do +verseBlock :: String -> OrgParser (F Blocks) +verseBlock blockType = try $ do ignHeaders - content <- rawBlockContent blkProp + content <- rawBlockContent blockType fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content) - -exportsCode :: [(String, String)] -> Bool -exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs - || ("rundoc-exports", "results") `elem` attrs) - -exportsResults :: [(String, String)] -> Bool -exportsResults attrs = ("rundoc-exports", "results") `elem` attrs - || ("rundoc-exports", "both") `elem` attrs - -followingResultsBlock :: OrgParser (Maybe (F Blocks)) -followingResultsBlock = - optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:" - *> blankline - *> block) + <$> mapM (parseFromString inlines) (map (++ "\n") . lines $ content) -codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks) -codeBlock blockAttrs blkProp = do +-- | Read a code block and the associated results block if present. Which of +-- boths blocks is included in the output is determined using the "exports" +-- argument in the block header. +codeBlock :: BlockAttributes -> String -> OrgParser (F Blocks) +codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) - leadingIndent <- lookAhead indentation - content <- rawBlockContent (updateIndent blkProp leadingIndent) - resultsContent <- followingResultsBlock + content <- rawBlockContent blockType + resultsContent <- trailingResultsBlock let id' = fromMaybe mempty $ blockAttrName blockAttrs let includeCode = exportsCode kv let includeResults = exportsResults kv @@ -248,36 +272,31 @@ codeBlock blockAttrs blkProp = do (labelDiv codeBlck) (blockAttrCaption blockAttrs) let resultBlck = fromMaybe mempty resultsContent - return $ (if includeCode then labelledBlck else mempty) - <> (if includeResults then resultBlck else mempty) + return $ + (if includeCode then labelledBlck else mempty) <> + (if includeResults then resultBlck else mempty) where + labelDiv :: Blocks -> F Inlines -> F Blocks labelDiv blk value = - B.divWith nullAttr <$> (mappend <$> labelledBlock value - <*> pure blk) - labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) + B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk) -rawBlockContent :: BlockProperties -> OrgParser String -rawBlockContent (indent, blockType) = try $ - unlines . map commaEscaped <$> manyTill indentedLine blockEnder - where - indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) - blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType) + labelledBlock :: F Inlines -> F Blocks + labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) -parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) -parsedBlockContent blkProps = try $ do - raw <- rawBlockContent blkProps - parseFromString blocks (raw ++ "\n") +exportsCode :: [(String, String)] -> Bool +exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs + || ("rundoc-exports", "results") `elem` attrs) --- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> OrgParser String -indentWith num = do - tabStop <- getOption readerTabStop - if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) - , try (char '\t' >> count (num - tabStop) (char ' ')) ] +exportsResults :: [(String, String)] -> Bool +exportsResults attrs = ("rundoc-exports", "results") `elem` attrs + || ("rundoc-exports", "both") `elem` attrs -type SwitchOption = (Char, Maybe String) +trailingResultsBlock :: OrgParser (Maybe (F Blocks)) +trailingResultsBlock = optionMaybe . try $ do + blanklines + stringAnyCase "#+RESULTS:" + blankline + block -- | Parse code block arguments -- TODO: We currently don't handle switches. @@ -297,8 +316,7 @@ codeHeaderArgs = try $ do hasRundocParameters = not . null toRundocAttrib = first ("rundoc-" ++) - -switch :: OrgParser SwitchOption +switch :: OrgParser (Char, Maybe String) switch = try $ simpleSwitch <|> lineNumbersSwitch where simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) @@ -334,24 +352,9 @@ orgParamValue :: OrgParser String orgParamValue = try $ skipSpaces *> notFollowedBy (char ':' ) - *> many1 (noneOf "\t\n\r ") + *> many1 nonspaceChar <* skipSpaces -commaEscaped :: String -> String -commaEscaped (',':cs@('*':_)) = cs -commaEscaped (',':cs@('#':'+':_)) = cs -commaEscaped cs = cs - -example :: OrgParser (F Blocks) -example = try $ do - return . return . exampleCode =<< unlines <$> many1 exampleLine - -exampleCode :: String -> Blocks -exampleCode = B.codeBlockWith ("", ["example"], []) - -exampleLine :: OrgParser String -exampleLine = try $ exampleLineStart *> anyLine - horizontalRule :: OrgParser (F Blocks) horizontalRule = return B.horizontalRule <$ try hline @@ -444,18 +447,26 @@ figure = try $ do selfTarget :: OrgParser String selfTarget = try $ char '[' *> linkTarget <* char ']' +-- +-- Examples +-- + +-- | Example code marked up by a leading colon. +example :: OrgParser (F Blocks) +example = try $ do + return . return . exampleCode =<< unlines <$> many1 exampleLine + where + exampleLine :: OrgParser String + exampleLine = try $ exampleLineStart *> anyLine + +exampleCode :: String -> Blocks +exampleCode = B.codeBlockWith ("", ["example"], []) + -- -- Comments, Options and Metadata -- -addLinkFormat :: String - -> (String -> String) - -> OrgParser () -addLinkFormat key formatter = updateState $ \s -> - let fs = orgStateLinkFormatters s - in s{ orgStateLinkFormatters = M.insert key formatter fs } - specialLine :: OrgParser (F Blocks) specialLine = fmap return . try $ metaLine <|> commentLine @@ -492,6 +503,14 @@ optionLine = try $ do "options" -> () <$ sepBy spaces exportSetting _ -> mzero +addLinkFormat :: String + -> (String -> String) + -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> + let fs = orgStateLinkFormatters s + in s{ orgStateLinkFormatters = M.insert key formatter fs } + + -- -- Export Settings -- @@ -618,10 +637,10 @@ header = try $ do title <- manyTill inline (lookAhead $ optional headerTags <* newline) tags <- option [] headerTags newline + let text = tagTitle title tags propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer) - inlines <- runF (tagTitle title tags) <$> getState - attr <- registerHeader propAttr inlines - return $ pure (B.headerWith attr level inlines) + attr <- registerHeader propAttr (runF text def) + return (B.headerWith attr level <$> text) where tagTitle :: [F Inlines] -> [String] -> F Inlines tagTitle title tags = trimInlinesF . mconcat $ title <> map tagToInlineF tags @@ -799,7 +818,7 @@ noteBlock = try $ do -- Paragraphs or Plain text paraOrPlain :: OrgParser (F Blocks) paraOrPlain = try $ do - ils <- parseInlines + ils <- inlines nl <- option False (newline *> return True) -- Read block as paragraph, except if we are in a list context and the block -- is directly followed by a list item, in which case the block is read as @@ -858,7 +877,7 @@ definitionListItem parseMarkerGetLength = try $ do line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) cont <- concat <$> many (listContinuation markerLength) - term' <- parseFromString parseInlines term + term' <- parseFromString inlines term contents' <- parseFromString blocks $ line1 ++ blank ++ cont return $ (,) <$> term' <*> fmap (:[]) contents' where @@ -884,7 +903,17 @@ listContinuation markerLength = try $ notFollowedBy' blankline *> (mappend <$> (concat <$> many1 listLine) <*> many blankline) - where listLine = try $ indentWith markerLength *> anyLineNewline + where + listLine = try $ indentWith markerLength *> anyLineNewline + + -- indent by specified number of spaces (or equiv. tabs) + indentWith :: Int -> OrgParser String + indentWith num = do + tabStop <- getOption readerTabStop + if num < tabStop + then count num (char ' ') + else choice [ try (count num (char ' ')) + , try (char '\t' >> count (num - tabStop) (char ' ')) ] -- | Parse any line, include the final newline in the output. anyLineNewline :: OrgParser String diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 0c3840979..a122c334a 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -28,8 +28,8 @@ Parsers for Org-mode inline elements. -} module Text.Pandoc.Readers.Org.Inlines ( inline + , inlines , addToNotesTable - , parseInlines , isImageFilename , linkTarget ) where @@ -145,8 +145,9 @@ inline = ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" -parseInlines :: OrgParser (F Inlines) -parseInlines = trimInlinesF . mconcat <$> many1 inline +-- | Read the rest of the input as inlines. +inlines :: OrgParser (F Inlines) +inlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 780053059..345ed462a 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -1293,7 +1293,7 @@ tests = ] ] - , "Verse block with newlines" =: + , "Verse block with blank lines" =: unlines [ "#+BEGIN_VERSE" , "foo" , "" @@ -1302,6 +1302,20 @@ tests = ] =?> para ("foo" <> linebreak <> linebreak <> "bar") + , "Raw block LaTeX" =: + unlines [ "#+BEGIN_LaTeX" + , "The category $\\cat{Set}$ is adhesive." + , "#+END_LaTeX" + ] =?> + rawBlock "latex" "The category $\\cat{Set}$ is adhesive.\n" + + , "Export block HTML" =: + unlines [ "#+BEGIN_export html" + , "<samp>Hello, World!</samp>" + , "#+END_export" + ] =?> + rawBlock "html" "<samp>Hello, World!</samp>\n" + , "LaTeX fragment" =: unlines [ "\\begin{equation}" , "X_i = \\begin{cases}" |