diff options
author | Albert Krewinkel <tarleb@moltkeplatz.de> | 2014-04-17 18:09:27 +0200 |
---|---|---|
committer | Albert Krewinkel <tarleb@moltkeplatz.de> | 2014-04-17 18:33:39 +0200 |
commit | 6d6724cf2c6ae6bcc0df312c476e45644c972a85 (patch) | |
tree | 3a56f4e0a59f931591329028e3d7244a4de2d9be | |
parent | 0672f58a445c289c58e42cffbbf32a273e801e39 (diff) | |
download | pandoc-6d6724cf2c6ae6bcc0df312c476e45644c972a85.tar.gz |
Org reader: Support more types of '#+BEGIN_<type>' blocks
Support for standard org-blocks is improved. The parser now handles
"HTML", "LATEX", "ASCII", "EXAMPLE", "QUOTE" and "VERSE" blocks in a
sensible fashion.
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 41 | ||||
-rw-r--r-- | tests/Tests/Readers/Org.hs | 97 |
2 files changed, 108 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 17f8a1c9e..88e81f5fc 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -37,6 +37,7 @@ import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF , newline, orderedListMarker + , parseFromString , updateLastStrPos ) import Text.Pandoc.Shared (compactify') @@ -47,7 +48,7 @@ import Control.Monad (foldM, guard, liftM, liftM2, when) import Control.Monad.Reader (Reader, runReader, ask, asks) import Data.Char (toLower) import Data.Default -import Data.List (isPrefixOf, isSuffixOf) +import Data.List (intersperse, isPrefixOf, isSuffixOf) import Data.Maybe (listToMaybe, fromMaybe) import Data.Monoid (Monoid, mconcat, mempty, mappend) @@ -156,6 +157,16 @@ addToNotesTable note = do oldnotes <- orgStateNotes' <$> getState updateState $ \s -> s{ orgStateNotes' = note:oldnotes } +-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts +-- of the state saved and restored. +parseFromString :: OrgParser a -> String -> OrgParser a +parseFromString parser str' = do + oldLastPreCharPos <- orgStateLastPreCharPos <$> getState + updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } + result <- P.parseFromString parser str' + updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } + return result + -- -- Adaptions and specializations of parsing utilities @@ -218,13 +229,27 @@ block = choice [ mempty <$ blanklines orgBlock :: OrgParser (F Blocks) orgBlock = try $ do (indent, blockType, args) <- blockHeader - blockStr <- rawBlockContent indent blockType + content <- rawBlockContent indent blockType + contentBlocks <- parseFromString parseBlocks (content ++ "\n") let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ] case blockType of "comment" -> return mempty - "src" -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr - _ -> fmap (B.divWith ("", [blockType], [])) - <$> parseFromString parseBlocks blockStr + "src" -> returnF $ B.codeBlockWith ("", classArgs, []) content + "html" -> returnF $ B.rawBlock "html" content + "latex" -> returnF $ B.rawBlock "latex" content + "ascii" -> returnF $ B.rawBlock "ascii" content + "example" -> returnF $ exampleCode content + "quote" -> return $ B.blockQuote <$> contentBlocks + "verse" -> parseVerse content + _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks + where + returnF :: a -> OrgParser (F a) + returnF = return . return + + parseVerse :: String -> OrgParser (F Blocks) + parseVerse cs = + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (lines cs) blockHeader :: OrgParser (Int, String, [String]) blockHeader = (,,) <$> blockIndent @@ -270,8 +295,10 @@ commaEscaped cs = cs example :: OrgParser (F Blocks) example = try $ do - body <- unlines <$> many1 exampleLine - return . return $ B.codeBlockWith ("", ["example"], []) body + return . return . exampleCode =<< unlines <$> many1 exampleLine + +exampleCode :: String -> Blocks +exampleCode = B.codeBlockWith ("", ["example"], []) exampleLine :: OrgParser String exampleLine = try $ string ": " *> anyLine diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 7f9c5f1d5..7d5bfe650 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -363,29 +363,6 @@ tests = , "#+END_COMMENT"] =?> (mempty::Blocks) - , "Source Block in Text" =: - unlines [ "Low German greeting" - , " #+BEGIN_SRC haskell" - , " main = putStrLn greeting" - , " where greeting = \"moin\"" - , " #+END_SRC" ] =?> - let attr' = ("", ["haskell"], []) - code' = "main = putStrLn greeting\n" ++ - " where greeting = \"moin\"\n" - in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] - , codeBlockWith attr' code' - ] - - , "Source Block" =: - unlines [ " #+BEGIN_SRC haskell" - , " main = putStrLn greeting" - , " where greeting = \"moin\"" - , " #+END_SRC" ] =?> - let attr' = ("", ["haskell"], []) - code' = "main = putStrLn greeting\n" ++ - " where greeting = \"moin\"\n" - in codeBlockWith attr' code' - , "Figure" =: unlines [ "#+caption: A very courageous man." , "#+name: goodguy" @@ -661,4 +638,78 @@ tests = , [ plain "2" , plain mempty , plain mempty ] ] ] + + , testGroup "Blocks" + [ "Source block" =: + unlines [ " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"moin\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"moin\"\n" + in codeBlockWith attr' code' + + , "Source block between paragraphs" =: + unlines [ "Low German greeting" + , " #+BEGIN_SRC haskell" + , " main = putStrLn greeting" + , " where greeting = \"Moin!\"" + , " #+END_SRC" ] =?> + let attr' = ("", ["haskell"], []) + code' = "main = putStrLn greeting\n" ++ + " where greeting = \"Moin!\"\n" + in mconcat [ para $ spcSep [ "Low", "German", "greeting" ] + , codeBlockWith attr' code' + ] + + , "Example block" =: + unlines [ "#+begin_example" + , "A chosen representation of" + , "a rule." + , "#+eND_exAMPle" + ] =?> + codeBlockWith ("", ["example"], []) + "A chosen representation of\na rule.\n" + + , "HTML block" =: + unlines [ "#+BEGIN_HTML" + , "<aside>HTML5 is pretty nice.</aside>" + , "#+END_HTML" + ] =?> + rawBlock "html" "<aside>HTML5 is pretty nice.</aside>\n" + + , "Quote block" =: + unlines [ "#+BEGIN_QUOTE" + , "/Niemand/ hat die Absicht, eine Mauer zu errichten!" + , "#+END_QUOTE" + ] =?> + blockQuote (para (spcSep [ emph "Niemand", "hat", "die", "Absicht," + , "eine", "Mauer", "zu", "errichten!" + ])) + + , "Verse block" =: + unlines [ "The first lines of Goethe's /Faust/:" + , "#+begin_verse" + , "Habe nun, ach! Philosophie," + , "Juristerei und Medizin," + , "Und leider auch Theologie!" + , "Durchaus studiert, mit heißem Bemühn." + , "#+end_verse" + ] =?> + mconcat + [ para $ spcSep [ "The", "first", "lines", "of" + , "Goethe's", emph "Faust" <> ":"] + , para $ mconcat + [ spcSep [ "Habe", "nun,", "ach!", "Philosophie," ] + , linebreak + , spcSep [ "Juristerei", "und", "Medizin," ] + , linebreak + , spcSep [ "Und", "leider", "auch", "Theologie!" ] + , linebreak + , spcSep [ "Durchaus", "studiert,", "mit", "heißem", "Bemühn." ] + ] + ] + + ] ] |