Org reader: Support more types of '#+BEGIN_<type>' blocks

Support for standard org-blocks is improved. The parser now handles "HTML", "LATEX", "ASCII", "EXAMPLE", "QUOTE" and "VERSE" blocks in a sensible fashion.
author: Albert Krewinkel <tarleb@moltkeplatz.de> 2014-04-17 18:09:27 +0200
committer: Albert Krewinkel <tarleb@moltkeplatz.de> 2014-04-17 18:33:39 +0200
commit: 6d6724cf2c6ae6bcc0df312c476e45644c972a85 (patch)
tree: 3a56f4e0a59f931591329028e3d7244a4de2d9be
parent: 0672f58a445c289c58e42cffbbf32a273e801e39 (diff)
download: pandoc-6d6724cf2c6ae6bcc0df312c476e45644c972a85.tar.gz
2 files changed, 108 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 17f8a1c9e..88e81f5fc 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -37,6 +37,7 @@ import           Text.Pandoc.Options
 import qualified Text.Pandoc.Parsing as P
 import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                             , newline, orderedListMarker
+                                            , parseFromString
                                             , updateLastStrPos )
 import           Text.Pandoc.Shared (compactify')
 
@@ -47,7 +48,7 @@ import           Control.Monad (foldM, guard, liftM, liftM2, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (toLower)
 import           Data.Default
-import           Data.List (isPrefixOf, isSuffixOf)
+import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)
 
@@ -156,6 +157,16 @@ addToNotesTable note = do
   oldnotes <- orgStateNotes' <$> getState
   updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
 
+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString parser str' = do
+  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+  result <- P.parseFromString parser str'
+  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+  return result
+
 
 --
 -- Adaptions and specializations of parsing utilities
@@ -218,13 +229,27 @@ block = choice [ mempty <$ blanklines
 orgBlock :: OrgParser (F Blocks)
 orgBlock = try $ do
   (indent, blockType, args) <- blockHeader
-  blockStr <- rawBlockContent indent blockType
+  content <- rawBlockContent indent blockType
+  contentBlocks <- parseFromString parseBlocks (content ++ "\n")
   let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
   case blockType of
     "comment" -> return mempty
-    "src"     -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr
-    _         -> fmap (B.divWith ("", [blockType], []))
-                 <$> parseFromString parseBlocks blockStr
+    "src"     -> returnF $ B.codeBlockWith ("", classArgs, []) content
+    "html"    -> returnF $ B.rawBlock "html" content
+    "latex"   -> returnF $ B.rawBlock "latex" content
+    "ascii"   -> returnF $ B.rawBlock "ascii" content
+    "example" -> returnF $ exampleCode content
+    "quote"   -> return  $ B.blockQuote <$> contentBlocks
+    "verse"   -> parseVerse content
+    _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
+ where
+   returnF :: a -> OrgParser (F a)
+   returnF = return . return
+
+   parseVerse :: String -> OrgParser (F Blocks)
+   parseVerse cs =
+       fmap B.para . mconcat . intersperse (pure B.linebreak)
+       <$> mapM (parseFromString parseInlines) (lines cs)
 
 blockHeader :: OrgParser (Int, String, [String])
 blockHeader = (,,) <$> blockIndent
@@ -270,8 +295,10 @@ commaEscaped cs                   = cs
 
 example :: OrgParser (F Blocks)
 example = try $ do
-  body <- unlines <$> many1 exampleLine
-  return . return $ B.codeBlockWith ("", ["example"], []) body
+  return . return . exampleCode =<< unlines <$> many1 exampleLine
+
+exampleCode :: String -> Blocks
+exampleCode = B.codeBlockWith ("", ["example"], [])
 
 exampleLine :: OrgParser String
 exampleLine = try $ string ": " *> anyLine
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 7f9c5f1d5..7d5bfe650 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -363,29 +363,6 @@ tests =
                   , "#+END_COMMENT"] =?>
           (mempty::Blocks)
 
-      , "Source Block in Text" =:
-          unlines [ "Low German greeting"
-                  , "  #+BEGIN_SRC haskell"
-                  , "  main = putStrLn greeting"
-                  , "    where greeting = \"moin\""
-                  , "  #+END_SRC" ] =?>
-          let attr' = ("", ["haskell"], [])
-              code' = "main = putStrLn greeting\n" ++
-                       "  where greeting = \"moin\"\n"
-          in mconcat [ para $ spcSep [ "Low", "German", "greeting"  ]
-                     , codeBlockWith attr' code'
-                     ]
-
-      , "Source Block" =:
-          unlines [ "  #+BEGIN_SRC haskell"
-                  , "  main = putStrLn greeting"
-                  , "    where greeting = \"moin\""
-                  , "  #+END_SRC" ] =?>
-          let attr' = ("", ["haskell"], [])
-              code' = "main = putStrLn greeting\n" ++
-                       "  where greeting = \"moin\"\n"
-          in codeBlockWith attr' code'
-
       , "Figure" =:
           unlines [ "#+caption: A very courageous man."
                   , "#+name: goodguy"
@@ -661,4 +638,78 @@ tests =
                 , [ plain "2"      , plain mempty , plain mempty  ]
                 ]
       ]
+
+    , testGroup "Blocks"
+      [ "Source block" =:
+           unlines [ "  #+BEGIN_SRC haskell"
+                   , "  main = putStrLn greeting"
+                   , "    where greeting = \"moin\""
+                   , "  #+END_SRC" ] =?>
+           let attr' = ("", ["haskell"], [])
+               code' = "main = putStrLn greeting\n" ++
+                       "  where greeting = \"moin\"\n"
+           in codeBlockWith attr' code'
+
+      , "Source block between paragraphs" =:
+           unlines [ "Low German greeting"
+                   , "  #+BEGIN_SRC haskell"
+                   , "  main = putStrLn greeting"
+                   , "    where greeting = \"Moin!\""
+                   , "  #+END_SRC" ] =?>
+           let attr' = ("", ["haskell"], [])
+               code' = "main = putStrLn greeting\n" ++
+                        "  where greeting = \"Moin!\"\n"
+           in mconcat [ para $ spcSep [ "Low", "German", "greeting"  ]
+                      , codeBlockWith attr' code'
+                      ]
+
+      , "Example block" =:
+           unlines [ "#+begin_example"
+                   , "A chosen representation of"
+                   , "a rule."
+                   , "#+eND_exAMPle"
+                   ] =?>
+           codeBlockWith ("", ["example"], [])
+                         "A chosen representation of\na rule.\n"
+
+      , "HTML block" =:
+           unlines [ "#+BEGIN_HTML"
+                   , "<aside>HTML5 is pretty nice.</aside>"
+                   , "#+END_HTML"
+                   ] =?>
+           rawBlock "html" "<aside>HTML5 is pretty nice.</aside>\n"
+
+      , "Quote block" =:
+           unlines [ "#+BEGIN_QUOTE"
+                   , "/Niemand/ hat die Absicht, eine Mauer zu errichten!"
+                   , "#+END_QUOTE"
+                   ] =?>
+           blockQuote (para (spcSep [ emph "Niemand", "hat", "die", "Absicht,"
+                                    , "eine", "Mauer", "zu", "errichten!"
+                                    ]))
+
+      , "Verse block" =:
+          unlines [ "The first lines of Goethe's /Faust/:"
+                  , "#+begin_verse"
+                  , "Habe nun, ach! Philosophie,"
+                  , "Juristerei und Medizin,"
+                  , "Und leider auch Theologie!"
+                  , "Durchaus studiert, mit heißem Bemühn."
+                  , "#+end_verse"
+                  ] =?>
+          mconcat
+          [ para $ spcSep [ "The", "first", "lines", "of"
+                          , "Goethe's", emph "Faust" <> ":"]
+          , para $ mconcat
+              [ spcSep [ "Habe", "nun,", "ach!", "Philosophie," ]
+              , linebreak
+              , spcSep [ "Juristerei", "und", "Medizin," ]
+              , linebreak
+              , spcSep [ "Und", "leider", "auch", "Theologie!" ]
+              , linebreak
+              , spcSep [ "Durchaus", "studiert,", "mit", "heißem", "Bemühn." ]
+              ]
+          ]
+
+      ]
   ]
author	Albert Krewinkel <tarleb@moltkeplatz.de>	2014-04-17 18:09:27 +0200
committer	Albert Krewinkel <tarleb@moltkeplatz.de>	2014-04-17 18:33:39 +0200
commit	6d6724cf2c6ae6bcc0df312c476e45644c972a85 (patch)
tree	3a56f4e0a59f931591329028e3d7244a4de2d9be
parent	0672f58a445c289c58e42cffbbf32a273e801e39 (diff)
download	pandoc-6d6724cf2c6ae6bcc0df312c476e45644c972a85.tar.gz