From 0558ea9836f5350a00f3df19d55ca6bddda1947b Mon Sep 17 00:00:00 2001 From: Brian Leung <29217594+leungbk@users.noreply.github.com> Date: Mon, 9 Sep 2019 07:34:10 +0200 Subject: Org reader: modify handling of example blocks. (#5717) * Org reader: allow the `-i` switch to ignore leading spaces. * Org reader: handle awkwardly-aligned code blocks within lists. Code blocks in Org lists must have their #+BEGIN_ aligned in a reasonable way, but their other components can be positioned otherwise. --- src/Text/Pandoc/Readers/Org/Blocks.hs | 55 ++++++++++++++++++++------- src/Text/Pandoc/Readers/Org/ParserState.hs | 2 + test/command/4186.md | 60 ++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 14 deletions(-) create mode 100644 test/command/4186.md diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 8ee9c025d..cba876f06 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -186,7 +186,7 @@ orgBlock = try $ do "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) "latex" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) "ascii" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) - "example" -> rawBlockLines (return . exampleCode) + "example" -> exampleBlock blockAttrs "quote" -> parseBlockLines (fmap B.blockQuote) "verse" -> verseBlock "src" -> codeBlock blockAttrs @@ -200,6 +200,16 @@ orgBlock = try $ do lowercase :: String -> String lowercase = map toLower +exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +exampleBlock blockAttrs _label = do + skipSpaces + (classes, kv) <- switchesAsAttributes + newline + content <- rawBlockContent "example" + let id' = fromMaybe mempty $ blockAttrName blockAttrs + let codeBlck = B.codeBlockWith (id', "example":classes, kv) content + return . return $ codeBlck + rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) @@ -216,11 +226,13 @@ rawBlockContent :: Monad m => String -> OrgParser m String rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop - return - . unlines - . stripIndent - . map (tabsToSpaces tabLen . commaEscaped) - $ blkLines + trimP <- orgStateTrimLeadBlkIndent <$> getState + let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs + (unlines + . stripIndent + . map (tabsToSpaces tabLen . commaEscaped) + $ blkLines) + <$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True }) where rawLine :: Monad m => OrgParser m String rawLine = try $ ("" <$ blankline) <|> anyLine @@ -228,9 +240,6 @@ rawBlockContent blockType = try $ do blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) - stripIndent :: [String] -> [String] - stripIndent strs = map (drop (shortestIndent strs)) strs - shortestIndent :: [String] -> Int shortestIndent = foldr (min . length . takeWhile isSpace) maxBound . filter (not . null) @@ -357,12 +366,19 @@ switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+') -- | Parses a source block switch option. switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) -switch = try $ lineNumberSwitch <|> labelSwitch <|> simpleSwitch +switch = try $ lineNumberSwitch <|> labelSwitch + <|> whitespaceSwitch <|> simpleSwitch where simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter labelSwitch = genericSwitch 'l' $ char '"' *> many1Till nonspaceChar (char '"') +whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +whitespaceSwitch = do + string "-i" + updateState $ \s -> s { orgStateTrimLeadBlkIndent = False } + return ('i', Nothing, SwitchMinus) + -- | Generic source block switch-option parser. genericSwitch :: Monad m => Char @@ -821,11 +837,22 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Monad m => Int - -> OrgParser m String +listContinuation :: PandocMonad m => Int -> OrgParser m String listContinuation markerLength = try $ do notFollowedBy' blankline - mappend <$> (concat <$> many1 listLine) + mappend <$> (concat <$> many1 (listContinuation' markerLength)) <*> many blankline where - listLine = try $ indentWith markerLength *> anyLineNewline + listContinuation' indentation = + blockLines indentation <|> listLine indentation + listLine indentation = try $ indentWith indentation *> anyLineNewline + -- The block attributes and start must be appropriately indented, + -- but the contents, and end do not. + blockLines indentation = + try $ lookAhead (indentWith indentation + >> blockAttributes + >>= (\blockAttrs -> + case attrFromBlockAttributes blockAttrs of + ("", [], []) -> count 1 anyChar + _ -> indentWith indentation)) + >> (snd <$> withRaw orgBlock) diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 374741893..d6dde8b22 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -117,6 +117,7 @@ data OrgParserState = OrgParserState , orgStateSelectTags :: Set.Set Tag , orgStateSelectTagsChanged :: Bool , orgStateTodoSequences :: [TodoSequence] + , orgStateTrimLeadBlkIndent :: Bool , orgLogMessages :: [LogMessage] , orgMacros :: M.Map Text Macro } @@ -184,6 +185,7 @@ defaultOrgParserState = OrgParserState , orgStateParserContext = NullState , orgStateSelectTags = Set.singleton $ Tag "export" , orgStateSelectTagsChanged = False + , orgStateTrimLeadBlkIndent = True , orgStateTodoSequences = [] , orgLogMessages = [] , orgMacros = M.empty diff --git a/test/command/4186.md b/test/command/4186.md new file mode 100644 index 000000000..2345b0350 --- /dev/null +++ b/test/command/4186.md @@ -0,0 +1,60 @@ +``` +% pandoc -f org -t native +#+BEGIN_EXAMPLE -i + This should retain the four leading spaces +#+END_EXAMPLE +^D +[CodeBlock ("",["example"],[]) " This should retain the four leading spaces\n"] +``` + +``` +% pandoc -f org -t html +- depth 1 + #+NAME: bob + #+BEGIN_EXAMPLE -i + Vertical alignment is four spaces beyond the appearance of the word "depth". + #+END_EXAMPLE + - depth 2 + #+begin_example + Vertically aligned with the second appearance of the word "depth". + #+end_example + #+begin_example -i + Vertical alignment is four spaces beyond the second + appearance of the word "depth". + The "begin" portion is a component of + this deeper list element, so that guarantees + that the entire block must be a component of the + inner list element. + #+end_example + Still inside the inner list element + #+NAME: carrie + #+BEGIN_EXAMPLE + This belongs to the outer list element, and is aligned accordingly, since the NAME attribute is not indented deeply enough. It is not enough for the BEGIN alone to be aligned deeply if the block is meant to have a NAME. + #+END_EXAMPLE + Still in the shallower list element since the preceding example + block forced the deeper list element to terminate. +Outside all lists. +^D + +

Outside all lists.

+``` -- cgit v1.2.3