diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2016-05-31 11:16:08 -0700 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2016-05-31 11:16:08 -0700 | 
| commit | 669ecbd4abc0061d83537511ebeae10713a50047 (patch) | |
| tree | c375f1f58f21d29d6f153043d32dab36a925f477 /src | |
| parent | 561afac0bc004e324358782c30a18eae0cd3cc4c (diff) | |
| parent | c17c62a2c74bbb6e36e12feea5aa6ba8679a023a (diff) | |
| download | pandoc-669ecbd4abc0061d83537511ebeae10713a50047.tar.gz | |
Merge pull request #2954 from tarleb/org-export-blocks
Org export blocks
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 263 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 7 | 
2 files changed, 150 insertions, 120 deletions
| diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index b374acfe2..36645a356 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -45,7 +45,7 @@ import           Text.Pandoc.Shared ( compactify', compactify'DL )  import           Control.Arrow ( first )  import           Control.Monad ( foldM, guard, mzero ) -import           Data.Char ( toLower, toUpper) +import           Data.Char ( isSpace, toLower, toUpper)  import           Data.List ( foldl', intersperse, isPrefixOf )  import qualified Data.Map as M  import           Data.Maybe ( fromMaybe, isNothing ) @@ -116,7 +116,7 @@ blockAttributes = try $ do    let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv    let name    = lookup "NAME" kv    caption' <- maybe (return Nothing) -                    (fmap Just . parseFromString parseInlines) +                    (fmap Just . parseFromString inlines)                      caption    kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs    return $ BlockAttributes @@ -161,85 +161,109 @@ keyValues = try $  -- Org Blocks (#+BEGIN_... / #+END_...)  -- -type BlockProperties = (Int, String)  -- (Indentation, Block-Type) - -updateIndent :: BlockProperties -> Int -> BlockProperties -updateIndent (_, blkType) indent = (indent, blkType) - +-- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE.  orgBlock :: OrgParser (F Blocks)  orgBlock = try $ do    blockAttrs <- blockAttributes -  blockProp@(_, blkType) <- blockHeaderStart -  ($ blockProp) $ +  blkType <- blockHeaderStart +  ($ blkType) $      case blkType of -      "comment" -> withRaw'   (const mempty) -      "html"    -> withRaw'   (return . (B.rawBlock blkType)) -      "latex"   -> withRaw'   (return . (B.rawBlock blkType)) -      "ascii"   -> withRaw'   (return . (B.rawBlock blkType)) -      "example" -> withRaw'   (return . exampleCode) -      "quote"   -> withParsed (fmap B.blockQuote) +      "export"  -> exportBlock +      "comment" -> rawBlockLines (const mempty) +      "html"    -> rawBlockLines (return . (B.rawBlock blkType)) +      "latex"   -> rawBlockLines (return . (B.rawBlock blkType)) +      "ascii"   -> rawBlockLines (return . (B.rawBlock blkType)) +      "example" -> rawBlockLines (return . exampleCode) +      "quote"   -> parseBlockLines (fmap B.blockQuote)        "verse"   -> verseBlock        "src"     -> codeBlock blockAttrs -      _         -> withParsed (fmap $ divWithClass blkType) - -blockHeaderStart :: OrgParser (Int, String) -blockHeaderStart = try $ (,) <$> indentation <*> blockType +      _         -> parseBlockLines (fmap $ B.divWith (mempty, [blkType], mempty))   where -  blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) +   blockHeaderStart :: OrgParser String +   blockHeaderStart = try $ do +     skipSpaces +     blockType <- stringAnyCase "#+begin_" *> orgArgWord +     return (map toLower blockType) -indentation :: OrgParser Int -indentation = try $ do -  tabStop  <- getOption readerTabStop -  s        <- many spaceChar -  return $ spaceLength tabStop s +rawBlockLines :: (String   -> F Blocks) -> String -> OrgParser (F Blocks) +rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) -spaceLength :: Int -> String -> Int -spaceLength tabStop s = (sum . map charLen) s +parseBlockLines :: (F Blocks -> F Blocks) -> String -> OrgParser (F Blocks) +parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent))   where -  charLen ' '  = 1 -  charLen '\t' = tabStop -  charLen _    = 0 - -withRaw'   :: (String   -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) -withRaw'   f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) - -withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) -withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) - +   parsedBlockContent :: OrgParser (F Blocks) +   parsedBlockContent = try $ do +     raw <- rawBlockContent blockType +     parseFromString blocks (raw ++ "\n") + +-- | Read the raw string content of a block +rawBlockContent :: String -> OrgParser String +rawBlockContent blockType = try $ do +  blkLines <- manyTill rawLine blockEnder +  tabLen <- getOption readerTabStop +  return +    . unlines +    . stripIndent +    . map (tabsToSpaces tabLen . commaEscaped) +    $ blkLines + where +   rawLine :: OrgParser String +   rawLine = try $ ("" <$ blankline) <|> anyLine + +   blockEnder :: OrgParser () +   blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) + +   stripIndent :: [String] -> [String] +   stripIndent strs = map (drop (shortestIndent strs)) strs + +   shortestIndent :: [String] -> Int +   shortestIndent = minimum +                    . map (length . takeWhile isSpace) +                    . filter (not . null) + +   tabsToSpaces :: Int -> String -> String +   tabsToSpaces _      []         = [] +   tabsToSpaces tabLen cs'@(c:cs) = +       case c of +         ' '  -> ' ':tabsToSpaces tabLen cs +         '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs +         _    -> cs' + +   commaEscaped :: String -> String +   commaEscaped (',':cs@('*':_))     = cs +   commaEscaped (',':cs@('#':'+':_)) = cs +   commaEscaped (' ':cs)             = ' ':commaEscaped cs +   commaEscaped ('\t':cs)            = '\t':commaEscaped cs +   commaEscaped cs                   = cs + +-- | Read but ignore all remaining block headers.  ignHeaders :: OrgParser ()  ignHeaders = (() <$ newline) <|> (() <$ anyLine) -divWithClass :: String -> Blocks -> Blocks -divWithClass cls = B.divWith ("", [cls], []) +-- | Read a block containing code intended for export in specific backends +-- only. +exportBlock :: String -> OrgParser (F Blocks) +exportBlock blockType = try $ do +  exportType <- skipSpaces *> orgArgWord <* ignHeaders +  contents   <- rawBlockContent blockType +  returnF (B.rawBlock (map toLower exportType) contents) -verseBlock :: BlockProperties -> OrgParser (F Blocks) -verseBlock blkProp = try $ do +verseBlock :: String -> OrgParser (F Blocks) +verseBlock blockType = try $ do    ignHeaders -  content <- rawBlockContent blkProp +  content <- rawBlockContent blockType    fmap B.para . mconcat . intersperse (pure B.linebreak) -    <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content) - -exportsCode :: [(String, String)] -> Bool -exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs -                         || ("rundoc-exports", "results") `elem` attrs) - -exportsResults :: [(String, String)] -> Bool -exportsResults attrs = ("rundoc-exports", "results") `elem` attrs -                       || ("rundoc-exports", "both") `elem` attrs - -followingResultsBlock :: OrgParser (Maybe (F Blocks)) -followingResultsBlock = -       optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:" -                                     *> blankline -                                     *> block) +    <$> mapM (parseFromString inlines) (map (++ "\n") . lines $ content) -codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks) -codeBlock blockAttrs blkProp = do +-- | Read a code block and the associated results block if present.  Which of +-- boths blocks is included in the output is determined using the "exports" +-- argument in the block header. +codeBlock :: BlockAttributes -> String -> OrgParser (F Blocks) +codeBlock blockAttrs blockType = do    skipSpaces    (classes, kv)     <- codeHeaderArgs <|> (mempty <$ ignHeaders) -  leadingIndent     <- lookAhead indentation -  content           <- rawBlockContent (updateIndent blkProp leadingIndent) -  resultsContent    <- followingResultsBlock +  content           <- rawBlockContent blockType +  resultsContent    <- trailingResultsBlock    let id'            = fromMaybe mempty $ blockAttrName blockAttrs    let includeCode    = exportsCode kv    let includeResults = exportsResults kv @@ -248,36 +272,31 @@ codeBlock blockAttrs blkProp = do                               (labelDiv codeBlck)                               (blockAttrCaption blockAttrs)    let resultBlck     = fromMaybe mempty resultsContent -  return $ (if includeCode then labelledBlck else mempty) -           <> (if includeResults then resultBlck else mempty) +  return $ +    (if includeCode    then labelledBlck else mempty) <> +    (if includeResults then resultBlck   else mempty)   where +   labelDiv :: Blocks -> F Inlines -> F Blocks     labelDiv blk value = -       B.divWith nullAttr <$> (mappend <$> labelledBlock value -                                       <*> pure blk) -   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) +     B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk) -rawBlockContent :: BlockProperties -> OrgParser String -rawBlockContent (indent, blockType) = try $ -  unlines . map commaEscaped <$> manyTill indentedLine blockEnder - where -   indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) -   blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType) +   labelledBlock :: F Inlines -> F Blocks +   labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) -parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) -parsedBlockContent blkProps = try $ do -  raw <- rawBlockContent blkProps -  parseFromString blocks (raw ++ "\n") +exportsCode :: [(String, String)] -> Bool +exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs +                         || ("rundoc-exports", "results") `elem` attrs) --- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> OrgParser String -indentWith num = do -  tabStop <- getOption readerTabStop -  if num < tabStop -     then count num (char ' ') -     else choice [ try (count num (char ' ')) -                 , try (char '\t' >> count (num - tabStop) (char ' ')) ] +exportsResults :: [(String, String)] -> Bool +exportsResults attrs = ("rundoc-exports", "results") `elem` attrs +                       || ("rundoc-exports", "both") `elem` attrs -type SwitchOption = (Char, Maybe String) +trailingResultsBlock :: OrgParser (Maybe (F Blocks)) +trailingResultsBlock = optionMaybe . try $ do +  blanklines +  stringAnyCase "#+RESULTS:" +  blankline +  block  -- | Parse code block arguments  -- TODO: We currently don't handle switches. @@ -297,8 +316,7 @@ codeHeaderArgs = try $ do     hasRundocParameters = not . null     toRundocAttrib = first ("rundoc-" ++) - -switch :: OrgParser SwitchOption +switch :: OrgParser (Char, Maybe String)  switch = try $ simpleSwitch <|> lineNumbersSwitch   where     simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) @@ -334,24 +352,9 @@ orgParamValue :: OrgParser String  orgParamValue = try $    skipSpaces      *> notFollowedBy (char ':' ) -    *> many1 (noneOf "\t\n\r ") +    *> many1 nonspaceChar      <* skipSpaces -commaEscaped :: String -> String -commaEscaped (',':cs@('*':_))     = cs -commaEscaped (',':cs@('#':'+':_)) = cs -commaEscaped cs                   = cs - -example :: OrgParser (F Blocks) -example = try $ do -  return . return . exampleCode =<< unlines <$> many1 exampleLine - -exampleCode :: String -> Blocks -exampleCode = B.codeBlockWith ("", ["example"], []) - -exampleLine :: OrgParser String -exampleLine = try $ exampleLineStart *> anyLine -  horizontalRule :: OrgParser (F Blocks)  horizontalRule = return B.horizontalRule <$ try hline @@ -444,18 +447,26 @@ figure = try $ do     selfTarget :: OrgParser String     selfTarget = try $ char '[' *> linkTarget <* char ']' +-- +-- Examples +-- + +-- | Example code marked up by a leading colon. +example :: OrgParser (F Blocks) +example = try $ do +  return . return . exampleCode =<< unlines <$> many1 exampleLine + where +   exampleLine :: OrgParser String +   exampleLine = try $ exampleLineStart *> anyLine + +exampleCode :: String -> Blocks +exampleCode = B.codeBlockWith ("", ["example"], []) +  --  -- Comments, Options and Metadata  -- -addLinkFormat :: String -              -> (String -> String) -              -> OrgParser () -addLinkFormat key formatter = updateState $ \s -> -  let fs = orgStateLinkFormatters s -  in s{ orgStateLinkFormatters = M.insert key formatter fs } -  specialLine :: OrgParser (F Blocks)  specialLine = fmap return . try $ metaLine <|> commentLine @@ -492,6 +503,14 @@ optionLine = try $ do      "options" -> () <$ sepBy spaces exportSetting      _         -> mzero +addLinkFormat :: String +              -> (String -> String) +              -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> +  let fs = orgStateLinkFormatters s +  in s{ orgStateLinkFormatters = M.insert key formatter fs } + +  --  -- Export Settings  -- @@ -618,10 +637,10 @@ header = try $ do    title    <- manyTill inline (lookAhead $ optional headerTags <* newline)    tags     <- option [] headerTags    newline +  let text = tagTitle title tags    propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer) -  inlines  <- runF (tagTitle title tags) <$> getState -  attr     <- registerHeader propAttr inlines -  return $ pure (B.headerWith attr level inlines) +  attr     <- registerHeader propAttr (runF text def) +  return (B.headerWith attr level <$> text)   where     tagTitle :: [F Inlines] -> [String] -> F Inlines     tagTitle title tags = trimInlinesF . mconcat $ title <> map tagToInlineF tags @@ -799,7 +818,7 @@ noteBlock = try $ do  -- Paragraphs or Plain text  paraOrPlain :: OrgParser (F Blocks)  paraOrPlain = try $ do -  ils <- parseInlines +  ils <- inlines    nl <- option False (newline *> return True)    -- Read block as paragraph, except if we are in a list context and the block    -- is directly followed by a list item, in which case the block is read as @@ -858,7 +877,7 @@ definitionListItem parseMarkerGetLength = try $ do    line1 <- anyLineNewline    blank <- option "" ("\n" <$ blankline)    cont <- concat <$> many (listContinuation markerLength) -  term' <- parseFromString parseInlines term +  term' <- parseFromString inlines term    contents' <- parseFromString blocks $ line1 ++ blank ++ cont    return $ (,) <$> term' <*> fmap (:[]) contents'   where @@ -884,7 +903,17 @@ listContinuation markerLength = try $    notFollowedBy' blankline    *> (mappend <$> (concat <$> many1 listLine)                <*> many blankline) - where listLine = try $ indentWith markerLength *> anyLineNewline + where +   listLine = try $ indentWith markerLength *> anyLineNewline + +   -- indent by specified number of spaces (or equiv. tabs) +   indentWith :: Int -> OrgParser String +   indentWith num = do +     tabStop <- getOption readerTabStop +     if num < tabStop +       then count num (char ' ') +       else choice [ try (count num (char ' ')) +                   , try (char '\t' >> count (num - tabStop) (char ' ')) ]  -- | Parse any line, include the final newline in the output.  anyLineNewline :: OrgParser String diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 0c3840979..a122c334a 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -28,8 +28,8 @@ Parsers for Org-mode inline elements.  -}  module Text.Pandoc.Readers.Org.Inlines    ( inline +  , inlines    , addToNotesTable -  , parseInlines    , isImageFilename    , linkTarget    ) where @@ -145,8 +145,9 @@ inline =           ] <* (guard =<< newlinesCountWithinLimits)    <?> "inline" -parseInlines :: OrgParser (F Inlines) -parseInlines = trimInlinesF . mconcat <$> many1 inline +-- | Read the rest of the input as inlines. +inlines :: OrgParser (F Inlines) +inlines = trimInlinesF . mconcat <$> many1 inline  -- treat these as potentially non-text when parsing inline:  specialChars :: [Char] | 
