From 42ba781b3dcb15a3a8fd4169fa03eeb316f05125 Mon Sep 17 00:00:00 2001
From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>
Date: Tue, 2 Dec 2008 22:41:51 +0000
Subject: Added literate haskell support for markdown reader.

+ Added stateLiterateHaskell to parser state.
+ Added parser for lhsCodeBlock to Markdown reader.
+ Added --lhs-in option, to treat input as literate haskell.
+ If first source has extension .lhs, assume --lhs-in.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1497 788f1e2b-df1e-0410-8736-df70ead52e1b
---
 Main.hs                         | 33 ++++++++++++++++++++++++---------
 Text/Pandoc/Readers/Markdown.hs | 27 +++++++++++++++++++++++++++
 Text/Pandoc/Shared.hs           |  2 ++
 3 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/Main.hs b/Main.hs
index 7853234e6..3237398ef 100644
--- a/Main.hs
+++ b/Main.hs
@@ -146,6 +146,7 @@ data Opt = Opt
     , optReferenceLinks    :: Bool    -- ^ Use reference links in writing markdown, rst
     , optWrapText          :: Bool    -- ^ Wrap text
     , optSanitizeHTML      :: Bool    -- ^ Sanitize HTML
+    , optLHSIn             :: Bool    -- ^ Treat input as literate haskell
 #ifdef _CITEPROC
     , optModsFile          :: String
     , optCslFile           :: String
@@ -179,6 +180,7 @@ defaultOpts = Opt
     , optReferenceLinks    = False
     , optWrapText          = True
     , optSanitizeHTML      = False
+    , optLHSIn             = False
 #ifdef _CITEPROC
     , optModsFile          = []
     , optCslFile           = []
@@ -288,6 +290,11 @@ options =
                   (\opt -> return opt { optSanitizeHTML = True }))
                  "" -- "Sanitize HTML"
 
+    , Option "" ["lhs-in"]
+                 (NoArg
+                  (\opt -> return opt { optLHSIn = True }))
+                 "" -- "Treat input as literate haskell"
+
     , Option "" ["toc", "table-of-contents"]
                 (NoArg
                  (\opt -> return opt { optTableOfContents = True }))
@@ -419,6 +426,11 @@ defaultReaderName (x:xs) =
     ".native"   -> "native"
     _           -> defaultReaderName xs
 
+-- Returns True if extension of first source is .lhs
+lhsExtension :: [FilePath] -> Bool
+lhsExtension (x:_) = takeExtension x == ".lhs"
+lhsExtension _ = False
+
 -- Determine default writer based on output file extension
 defaultWriterName :: FilePath -> String
 defaultWriterName "-" = "html" -- no output file
@@ -498,6 +510,7 @@ main = do
               , optReferenceLinks    = referenceLinks
               , optWrapText          = wrap
               , optSanitizeHTML      = sanitize
+              , optLHSIn             = lhsIn
 #ifdef _CITEPROC
              , optModsFile           = modsFile
              , optCslFile            = cslFile
@@ -556,17 +569,19 @@ main = do
 #endif
 
   let startParserState =
-         defaultParserState { stateParseRaw     = parseRaw,
-                              stateTabStop      = tabStop,
-                              stateSanitizeHTML = sanitize,
-                              stateStandalone   = standalone',
+         defaultParserState { stateParseRaw        = parseRaw,
+                              stateTabStop         = tabStop,
+                              stateSanitizeHTML    = sanitize,
+                              stateLiterateHaskell = lhsIn ||
+                                                     lhsExtension sources,
+                              stateStandalone      = standalone',
 #ifdef _CITEPROC
-                              stateCitations    = map citeKey refs,
+                              stateCitations       = map citeKey refs,
 #endif
-                              stateSmart        = smart || writerName' `elem`
-                                                           ["latex", "context"],
-                              stateColumns      = columns,
-                              stateStrict       = strict }
+                              stateSmart           = smart || writerName' `elem`
+                                                              ["latex", "context"],
+                              stateColumns         = columns,
+                              stateStrict          = strict }
   let csslink = if null css
                    then ""
                    else concatMap
diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs
index e9f19e817..b2e0928c9 100644
--- a/Text/Pandoc/Readers/Markdown.hs
+++ b/Text/Pandoc/Readers/Markdown.hs
@@ -45,6 +45,7 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
                                   htmlBlockElement, unsanitaryURI )
 import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
 import Text.ParserCombinators.Parsec
+import Control.Monad (when)
 
 -- | Read markdown from an input string and return a Pandoc document.
 readMarkdown :: ParserState -> String -> Pandoc
@@ -102,6 +103,12 @@ failUnlessSmart = do
   state <- getState
   if stateSmart state then return () else fail "Smart typography feature"
 
+-- | Fail unless we're in literate haskell mode.
+failUnlessLHS :: GenParser tok ParserState ()
+failUnlessLHS = do
+  state <- getState
+  if stateLiterateHaskell state then return () else fail "Literate haskell feature"
+
 -- | Parse a sequence of inline elements between square brackets,
 -- including inlines between balanced pairs of square brackets.
 inlinesInBalancedBrackets :: GenParser Char ParserState Inline
@@ -268,6 +275,7 @@ block = do
                    , header 
                    , table
                    , codeBlockIndented
+                   , lhsCodeBlock
                    , blockQuote
                    , hrule
                    , bulletList
@@ -393,6 +401,25 @@ codeBlockIndented = do
   optional blanklines
   return $ CodeBlock ("",[],[]) $ stripTrailingNewlines $ concat contents
 
+lhsCodeBlock :: GenParser Char ParserState Block
+lhsCodeBlock = do
+  failUnlessLHS
+  pos <- getPosition
+  when (sourceColumn pos /= 1) $ fail "Not in first column"
+  lns <- many1 birdTrackLine
+  -- if (as is normal) there is always a space after >, drop it
+  let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
+                then map (drop 1) lns
+                else lns
+  blanklines
+  return $ CodeBlock ("",["haskell"],[]) $ intercalate "\n" lns'
+
+birdTrackLine :: GenParser Char st [Char]
+birdTrackLine = do
+  char '>'
+  manyTill anyChar newline
+
+
 --
 -- block quotes
 --
diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs
index 8b08a0e94..0d137e9a9 100644
--- a/Text/Pandoc/Shared.hs
+++ b/Text/Pandoc/Shared.hs
@@ -635,6 +635,7 @@ data ParserState = ParserState
       stateDate            :: String,        -- ^ Date of document
       stateStrict          :: Bool,          -- ^ Use strict markdown syntax?
       stateSmart           :: Bool,          -- ^ Use smart typography?
+      stateLiterateHaskell :: Bool,          -- ^ Treat input as literate haskell
       stateColumns         :: Int,           -- ^ Number of columns in terminal
       stateHeaderTable     :: [HeaderType]   -- ^ Ordered list of header types used
     }
@@ -658,6 +659,7 @@ defaultParserState =
                   stateDate            = [],
                   stateStrict          = False,
                   stateSmart           = False,
+                  stateLiterateHaskell = False,
                   stateColumns         = 80,
                   stateHeaderTable     = [] }
 
-- 
cgit v1.2.3