From 2cdfa5eb207ca07d96235eb018928812f21da593 Mon Sep 17 00:00:00 2001 From: Daniel Bergey Date: Mon, 1 Dec 2014 16:27:51 +0000 Subject: parse RST quoted literal blocks closes #65 RST quoted literal blocks are the same as indented literal blocks (which pandoc already supports) except that the quote character is preserved in each line. This includes test cases for the quoted literal block, as well as additional tests for line blocks and indented literal blocks, to verify that these are unaffected by the changes. --- src/Text/Pandoc/Readers/RST.hs | 10 +++++++++- tests/Tests/Readers/RST.hs | 17 ++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index e5eccb116..dbbfe5191 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -335,6 +335,13 @@ indentedBlock = try $ do optional blanklines return $ unlines lns +quotedBlock :: Parser [Char] st [Char] +quotedBlock = try $ do + quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + lns <- many1 $ lookAhead (char quote) >> anyLine + optional blanklines + return $ unlines lns + codeBlockStart :: Parser [Char] st Char codeBlockStart = string "::" >> blankline >> blankline @@ -342,7 +349,8 @@ codeBlock :: Parser [Char] st Blocks codeBlock = try $ codeBlockStart >> codeBlockBody codeBlockBody :: Parser [Char] st Blocks -codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> indentedBlock +codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> + (indentedBlock <|> quotedBlock) lhsCodeBlock :: RSTParser Blocks lhsCodeBlock = try $ do diff --git a/tests/Tests/Readers/RST.hs b/tests/Tests/Readers/RST.hs index a80dc32b7..4b0abdf72 100644 --- a/tests/Tests/Readers/RST.hs +++ b/tests/Tests/Readers/RST.hs @@ -67,5 +67,20 @@ tests = [ "line block with blank line" =: link "http://foo.bar.baz" "" "http://foo.bar.baz" <> ". " <> link "http://foo.bar/baz_(bam)" "" "http://foo.bar/baz_(bam)" <> " (" <> link "http://foo.bar" "" "http://foo.bar" <> ")") + , "indented literal block" =: unlines + [ "::" + , "" + , " block quotes" + , "" + , " can go on for many lines" + , "but must stop here"] + =?> (doc $ + codeBlock "block quotes\n\ncan go on for many lines" <> + para "but must stop here") + , "line block with 3 lines" =: "| a\n| b\n| c" + =?> para ("a" <> linebreak <> "b" <> linebreak <> "c") + , "quoted literal block using >" =: "::\n\n> quoted\n> block\n\nOrdinary paragraph" + =?> codeBlock "> quoted\n> block" <> para "Ordinary paragraph" + , "quoted literal block using | (not a line block)" =: "::\n\n| quoted\n| block\n\nOrdinary paragraph" + =?> codeBlock "| quoted\n| block" <> para "Ordinary paragraph" ] - -- cgit v1.2.3 From 74c1b547c247bbe2adab0fc6857cd78eaef918c3 Mon Sep 17 00:00:00 2001 From: Daniel Bergey Date: Mon, 1 Dec 2014 18:18:34 +0000 Subject: parse RST class directives The class directive accepts one or more class names, and creates a Div value with those classes. If the directive has an indented body, the body is parsed as the children of the Div. If not, the first block folowing the directive is made a child of the Div. This differs from the behavior of rst2xml, which does not create a Div element. Instead, the specified classes are applied to each child of the directive. However, most Pandoc Block constructors to not take an Attr argument, so we can't duplicate this behavior. --- src/Text/Pandoc/Readers/RST.hs | 10 ++++++++-- tests/Tests/Readers/RST.hs | 6 ++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index dbbfe5191..732956981 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -47,7 +47,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) import qualified Text.Pandoc.Builder as B import Data.Monoid (mconcat, mempty) import Data.Sequence (viewr, ViewR(..)) -import Data.Char (toLower, isHexDigit) +import Data.Char (toLower, isHexDigit, isSpace) -- | Parse reStructuredText string and return Pandoc document. readRST :: ReaderOptions -- ^ Reader options @@ -521,7 +521,6 @@ directive = try $ do -- TODO: line-block, parsed-literal, table, csv-table, list-table -- date -- include --- class -- title directive' :: RSTParser Blocks directive' = do @@ -602,6 +601,13 @@ directive' = do Just t -> B.link (escapeURI $ trim t) "" $ B.image src "" alt Nothing -> B.image src "" alt + "class" -> do + let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields) + -- directive content or the first immediately following element + children <- case body of + "" -> block + _ -> parseFromString parseBlocks body' + return $ B.divWith attrs children _ -> return mempty -- TODO: diff --git a/tests/Tests/Readers/RST.hs b/tests/Tests/Readers/RST.hs index 4b0abdf72..c97dcb149 100644 --- a/tests/Tests/Readers/RST.hs +++ b/tests/Tests/Readers/RST.hs @@ -83,4 +83,10 @@ tests = [ "line block with blank line" =: =?> codeBlock "> quoted\n> block" <> para "Ordinary paragraph" , "quoted literal block using | (not a line block)" =: "::\n\n| quoted\n| block\n\nOrdinary paragraph" =?> codeBlock "| quoted\n| block" <> para "Ordinary paragraph" + , "class directive with single paragraph" =: ".. class:: special\n\nThis is a \"special\" paragraph." + =?> divWith ("", ["special"], []) (para "This is a \"special\" paragraph.") + , "class directive with two paragraphs" =: ".. class:: exceptional remarkable\n\n First paragraph.\n\n Second paragraph." + =?> divWith ("", ["exceptional", "remarkable"], []) (para "First paragraph." <> para "Second paragraph.") + , "class directive around literal block" =: ".. class:: classy\n\n::\n\n a\n b" + =?> divWith ("", ["classy"], []) (codeBlock "a\nb") ] -- cgit v1.2.3