diff options
Diffstat (limited to 'src')
52 files changed, 3279 insertions, 3026 deletions
diff --git a/src/Tests/Arbitrary.hs b/src/Tests/Arbitrary.hs deleted file mode 100644 index 9d65e1f1f..000000000 --- a/src/Tests/Arbitrary.hs +++ /dev/null @@ -1,190 +0,0 @@ -{-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, ScopedTypeVariables #-} --- provides Arbitrary instance for Pandoc types -module Tests.Arbitrary () -where -import Test.QuickCheck.Gen -import Test.QuickCheck.Arbitrary -import Control.Monad (liftM, liftM2) -import Text.Pandoc.Definition -import Text.Pandoc.Shared (normalize, escapeURI) -import Text.Pandoc.Builder - -realString :: Gen String -realString = resize 8 $ listOf $ frequency [ (9, elements [' '..'\127']) - , (1, elements ['\128'..'\9999']) ] - -arbAttr :: Gen Attr -arbAttr = do - id' <- elements ["","loc"] - classes <- elements [[],["haskell"],["c","numberLines"]] - keyvals <- elements [[],[("start","22")],[("a","11"),("b_2","a b c")]] - return (id',classes,keyvals) - -instance Arbitrary Inlines where - arbitrary = liftM (fromList :: [Inline] -> Inlines) arbitrary - -instance Arbitrary Blocks where - arbitrary = liftM (fromList :: [Block] -> Blocks) arbitrary - -instance Arbitrary Inline where - arbitrary = resize 3 $ arbInline 2 - -arbInlines :: Int -> Gen [Inline] -arbInlines n = listOf1 (arbInline n) `suchThat` (not . startsWithSpace) - where startsWithSpace (Space:_) = True - startsWithSpace _ = False - --- restrict to 3 levels of nesting max; otherwise we get --- bogged down in indefinitely large structures -arbInline :: Int -> Gen Inline -arbInline n = frequency $ [ (60, liftM Str realString) - , (60, return Space) - , (10, liftM2 Code arbAttr realString) - , (5, elements [ RawInline "html" "<a id=\"eek\">" - , RawInline "latex" "\\my{command}" ]) - ] ++ [ x | x <- nesters, n > 1] - where nesters = [ (10, liftM Emph $ arbInlines (n-1)) - , (10, liftM Strong $ arbInlines (n-1)) - , (10, liftM Strikeout $ arbInlines (n-1)) - , (10, liftM Superscript $ arbInlines (n-1)) - , (10, liftM Subscript $ arbInlines (n-1)) --- , (10, liftM SmallCaps $ arbInlines (n-1)) - , (10, do x1 <- arbitrary - x2 <- arbInlines (n-1) - return $ Quoted x1 x2) - , (10, do x1 <- arbitrary - x2 <- realString - return $ Math x1 x2) - , (10, do x1 <- arbInlines (n-1) - x3 <- realString - x2 <- liftM escapeURI realString - return $ Link x1 (x2,x3)) - , (10, do x1 <- arbInlines (n-1) - x3 <- realString - x2 <- liftM escapeURI realString - return $ Image x1 (x2,x3)) - , (2, liftM Note $ resize 3 $ listOf1 $ arbBlock (n-1)) - ] - -instance Arbitrary Block where - arbitrary = resize 3 $ arbBlock 2 - -arbBlock :: Int -> Gen Block -arbBlock n = frequency $ [ (10, liftM Plain $ arbInlines (n-1)) - , (15, liftM Para $ arbInlines (n-1)) - , (5, liftM2 CodeBlock arbAttr realString) - , (2, elements [ RawBlock "html" - "<div>\n*&*\n</div>" - , RawBlock "latex" - "\\begin[opt]{env}\nhi\n{\\end{env}" - ]) - , (5, do x1 <- choose (1 :: Int, 6) - x2 <- arbInlines (n-1) - return (Header x1 x2)) - , (2, return HorizontalRule) - ] ++ [x | x <- nesters, n > 0] - where nesters = [ (5, liftM BlockQuote $ listOf1 $ arbBlock (n-1)) - , (5, do x2 <- arbitrary - x3 <- arbitrary - x1 <- arbitrary `suchThat` (> 0) - x4 <- listOf1 $ listOf1 $ arbBlock (n-1) - return $ OrderedList (x1,x2,x3) x4 ) - , (5, liftM BulletList $ (listOf1 $ listOf1 $ arbBlock (n-1))) - , (5, do items <- listOf1 $ do - x1 <- listOf1 $ listOf1 $ arbBlock (n-1) - x2 <- arbInlines (n-1) - return (x2,x1) - return $ DefinitionList items) - , (2, do rs <- choose (1 :: Int, 4) - cs <- choose (1 :: Int, 4) - x1 <- arbInlines (n-1) - x2 <- vector cs - x3 <- vectorOf cs $ elements [0, 0.25] - x4 <- vectorOf cs $ listOf $ arbBlock (n-1) - x5 <- vectorOf rs $ vectorOf cs - $ listOf $ arbBlock (n-1) - return (Table x1 x2 x3 x4 x5)) - ] - -instance Arbitrary Pandoc where - arbitrary = resize 8 $ liftM normalize - $ liftM2 Pandoc arbitrary arbitrary - -{- -instance Arbitrary CitationMode where - arbitrary - = do x <- choose (0 :: Int, 2) - case x of - 0 -> return AuthorInText - 1 -> return SuppressAuthor - 2 -> return NormalCitation - _ -> error "FATAL ERROR: Arbitrary instance, logic bug" - -instance Arbitrary Citation where - arbitrary - = do x1 <- liftM (filter (`notElem` ",;]@ \t\n")) arbitrary - x2 <- arbitrary - x3 <- arbitrary - x4 <- arbitrary - x5 <- arbitrary - x6 <- arbitrary - return (Citation x1 x2 x3 x4 x5 x6) --} - -instance Arbitrary MathType where - arbitrary - = do x <- choose (0 :: Int, 1) - case x of - 0 -> return DisplayMath - 1 -> return InlineMath - _ -> error "FATAL ERROR: Arbitrary instance, logic bug" - -instance Arbitrary QuoteType where - arbitrary - = do x <- choose (0 :: Int, 1) - case x of - 0 -> return SingleQuote - 1 -> return DoubleQuote - _ -> error "FATAL ERROR: Arbitrary instance, logic bug" - -instance Arbitrary Meta where - arbitrary - = do x1 <- arbitrary - x2 <- liftM (filter (not . null)) arbitrary - x3 <- arbitrary - return (Meta x1 x2 x3) - -instance Arbitrary Alignment where - arbitrary - = do x <- choose (0 :: Int, 3) - case x of - 0 -> return AlignLeft - 1 -> return AlignRight - 2 -> return AlignCenter - 3 -> return AlignDefault - _ -> error "FATAL ERROR: Arbitrary instance, logic bug" - -instance Arbitrary ListNumberStyle where - arbitrary - = do x <- choose (0 :: Int, 6) - case x of - 0 -> return DefaultStyle - 1 -> return Example - 2 -> return Decimal - 3 -> return LowerRoman - 4 -> return UpperRoman - 5 -> return LowerAlpha - 6 -> return UpperAlpha - _ -> error "FATAL ERROR: Arbitrary instance, logic bug" - -instance Arbitrary ListNumberDelim where - arbitrary - = do x <- choose (0 :: Int, 3) - case x of - 0 -> return DefaultDelim - 1 -> return Period - 2 -> return OneParen - 3 -> return TwoParens - _ -> error "FATAL ERROR: Arbitrary instance, logic bug" - diff --git a/src/Tests/Helpers.hs b/src/Tests/Helpers.hs deleted file mode 100644 index 66879efed..000000000 --- a/src/Tests/Helpers.hs +++ /dev/null @@ -1,114 +0,0 @@ -{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, TemplateHaskell #-} --- Utility functions for the test suite. - -module Tests.Helpers ( lit - , file - , test - , (=?>) - , property - , ToString(..) - , ToPandoc(..) - ) - where - -import Text.Pandoc.Definition -import Text.Pandoc.Builder (Inlines, Blocks, doc, plain) -import Test.Framework -import Test.Framework.Providers.HUnit -import Test.Framework.Providers.QuickCheck2 -import Test.HUnit (assertBool) -import Text.Pandoc.Shared (normalize, defaultWriterOptions, - WriterOptions(..), removeTrailingSpace) -import Text.Pandoc.Writers.Native (writeNative) -import Language.Haskell.TH.Quote (QuasiQuoter(..)) -import Language.Haskell.TH.Syntax (Q, runIO) -import qualified Test.QuickCheck.Property as QP -import System.Console.ANSI -import Data.Algorithm.Diff - -lit :: QuasiQuoter -lit = QuasiQuoter { - quoteExp = (\a -> let b = rnl a in [|b|]) . filter (/= '\r') - , quotePat = error "Cannot use lit as a pattern" - } - where rnl ('\n':xs) = xs - rnl xs = xs - -file :: QuasiQuoter -file = quoteFile lit - --- adapted from TH 2.5 code -quoteFile :: QuasiQuoter -> QuasiQuoter -quoteFile (QuasiQuoter { quoteExp = qe, quotePat = qp }) = - QuasiQuoter { quoteExp = get qe, quotePat = get qp } - where - get :: (String -> Q a) -> String -> Q a - get old_quoter file_name = do { file_cts <- runIO (readFile file_name) - ; old_quoter file_cts } - -test :: (ToString a, ToString b, ToString c) - => (a -> b) -- ^ function to test - -> String -- ^ name of test case - -> (a, c) -- ^ (input, expected value) - -> Test -test fn name (input, expected) = - testCase name $ assertBool msg (actual' == expected') - where msg = nl ++ dashes "input" ++ nl ++ input' ++ nl ++ - dashes "expected" ++ nl ++ expected'' ++ - dashes "got" ++ nl ++ actual'' ++ - dashes "" - nl = "\n" - input' = toString input - actual' = toString $ fn input - expected' = toString expected - diff = getDiff (lines expected') (lines actual') - expected'' = unlines $ map vividize $ filter (\(d,_) -> d /= S) diff - actual'' = unlines $ map vividize $ filter (\(d,_) -> d /= F) diff - dashes "" = replicate 72 '-' - dashes x = replicate (72 - length x - 5) '-' ++ " " ++ x ++ " ---" - -vividize :: (DI,String) -> String -vividize (B,s) = s -vividize (F,s) = s -vividize (S,s) = setSGRCode [SetColor Background Dull Red - , SetColor Foreground Vivid White] ++ s - ++ setSGRCode [Reset] - -property :: QP.Testable a => TestName -> a -> Test -property = testProperty - -infix 5 =?> -(=?>) :: a -> b -> (a,b) -x =?> y = (x, y) - -class ToString a where - toString :: a -> String - -instance ToString Pandoc where - toString d = writeNative defaultWriterOptions{ writerStandalone = s } - $ toPandoc d - where s = case d of - (Pandoc (Meta [] [] []) _) -> False - _ -> True - -instance ToString Blocks where - toString = writeNative defaultWriterOptions . toPandoc - -instance ToString Inlines where - toString = removeTrailingSpace . writeNative defaultWriterOptions . - toPandoc - -instance ToString String where - toString = id - -class ToPandoc a where - toPandoc :: a -> Pandoc - -instance ToPandoc Pandoc where - toPandoc = normalize - -instance ToPandoc Blocks where - toPandoc = normalize . doc - -instance ToPandoc Inlines where - toPandoc = normalize . doc . plain diff --git a/src/Tests/Old.hs b/src/Tests/Old.hs deleted file mode 100644 index 67eb51573..000000000 --- a/src/Tests/Old.hs +++ /dev/null @@ -1,202 +0,0 @@ -module Tests.Old (tests) where - -import Test.Framework (testGroup, Test ) -import Test.Framework.Providers.HUnit -import Test.HUnit ( assertBool ) - -import System.IO ( openTempFile, stderr ) -import System.Process ( runProcess, waitForProcess ) -import System.FilePath ( (</>), (<.>) ) -import System.Directory -import System.Exit -import Data.Algorithm.Diff -import Text.Pandoc.Shared ( normalize, defaultWriterOptions ) -import Text.Pandoc.Writers.Native ( writeNative ) -import Text.Pandoc.Readers.Native ( readNative ) -import Prelude hiding ( readFile ) -import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 (toString) -import Text.Printf - -readFileUTF8 :: FilePath -> IO String -readFileUTF8 f = B.readFile f >>= return . toString - -pandocPath :: FilePath -pandocPath = ".." </> "dist" </> "build" </> "pandoc" </> "pandoc" - -data TestResult = TestPassed - | TestError ExitCode - | TestFailed String FilePath [(DI, String)] - deriving (Eq) - -instance Show TestResult where - show TestPassed = "PASSED" - show (TestError ec) = "ERROR " ++ show ec - show (TestFailed cmd file d) = '\n' : dash ++ - "\n--- " ++ file ++ - "\n+++ " ++ cmd ++ "\n" ++ showDiff (1,1) d ++ - dash - where dash = replicate 72 '-' - -showDiff :: (Int,Int) -> [(DI, String)] -> String -showDiff _ [] = "" -showDiff (l,r) ((F, ln) : ds) = - printf "+%4d " l ++ ln ++ "\n" ++ showDiff (l+1,r) ds -showDiff (l,r) ((S, ln) : ds) = - printf "-%4d " r ++ ln ++ "\n" ++ showDiff (l,r+1) ds -showDiff (l,r) ((B, _ ) : ds) = - showDiff (l+1,r+1) ds - -tests :: [Test] -tests = [ testGroup "markdown" - [ testGroup "writer" - $ writerTests "markdown" ++ lhsWriterTests "markdown" - , testGroup "reader" - [ test "basic" ["-r", "markdown", "-w", "native", "-s", "-S"] - "testsuite.txt" "testsuite.native" - , test "tables" ["-r", "markdown", "-w", "native", "--columns=80"] - "tables.txt" "tables.native" - , test "more" ["-r", "markdown", "-w", "native", "-S"] - "markdown-reader-more.txt" "markdown-reader-more.native" - , lhsReaderTest "markdown+lhs" - ] - , testGroup "citations" markdownCitationTests - ] - , testGroup "rst" - [ testGroup "writer" (writerTests "rst" ++ lhsWriterTests "rst") - , testGroup "reader" - [ test "basic" ["-r", "rst", "-w", "native", - "-s", "-S", "--columns=80"] "rst-reader.rst" "rst-reader.native" - , test "tables" ["-r", "rst", "-w", "native", "--columns=80"] - "tables.rst" "tables-rstsubset.native" - , lhsReaderTest "rst+lhs" - ] - ] - , testGroup "latex" - [ testGroup "writer" (writerTests "latex" ++ lhsWriterTests "latex") - , testGroup "reader" - [ test "basic" ["-r", "latex", "-w", "native", "-s", "-R"] - "latex-reader.latex" "latex-reader.native" - , lhsReaderTest "latex+lhs" - ] - ] - , testGroup "html" - [ testGroup "writer" (writerTests "html" ++ lhsWriterTests "html") - , test "reader" ["-r", "html", "-w", "native", "-s"] - "html-reader.html" "html-reader.native" - ] - , testGroup "s5" - [ s5WriterTest "basic" ["-s"] "s5" - , s5WriterTest "fancy" ["-s","-m","-i"] "s5" - , s5WriterTest "fragment" [] "html" - , s5WriterTest "inserts" ["-s", "-H", "insert", - "-B", "insert", "-A", "insert", "-c", "main.css"] "html" - ] - , testGroup "textile" - [ testGroup "writer" $ writerTests "textile" - , test "reader" ["-r", "textile", "-w", "native", "-s"] - "textile-reader.textile" "textile-reader.native" - ] - , testGroup "docbook" - [ testGroup "writer" $ writerTests "docbook" - , test "reader" ["-r", "docbook", "-w", "native", "-s"] - "docbook-reader.docbook" "docbook-reader.native" - ] - , testGroup "native" - [ testGroup "writer" $ writerTests "native" - , test "reader" ["-r", "native", "-w", "native", "-s"] - "testsuite.native" "testsuite.native" - ] - , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) - [ "opendocument" , "context" , "texinfo" - , "man" , "plain" , "mediawiki", "rtf", "org", "asciidoc" - ] - ] - --- makes sure file is fully closed after reading -readFile' :: FilePath -> IO String -readFile' f = do s <- readFileUTF8 f - return $! (length s `seq` s) - -lhsWriterTests :: String -> [Test] -lhsWriterTests format - = [ t "lhs to normal" format - , t "lhs to lhs" (format ++ "+lhs") - ] - where - t n f = test n ["--columns=78", "-r", "native", "-s", "-w", f] - "lhs-test.native" ("lhs-test" <.> f) - -lhsReaderTest :: String -> Test -lhsReaderTest format = - testWithNormalize normalizer "lhs" ["-r", format, "-w", "native"] - ("lhs-test" <.> format) "lhs-test.native" - where normalizer = writeNative defaultWriterOptions . normalize . readNative - -writerTests :: String -> [Test] -writerTests format - = [ test "basic" (opts ++ ["-s"]) "testsuite.native" ("writer" <.> format) - , test "tables" opts "tables.native" ("tables" <.> format) - ] - where - opts = ["-r", "native", "-w", format, "--columns=78"] - -s5WriterTest :: String -> [String] -> String -> Test -s5WriterTest modifier opts format - = test (format ++ " writer (" ++ modifier ++ ")") - (["-r", "native", "-w", format] ++ opts) - "s5.native" ("s5." ++ modifier <.> "html") - -markdownCitationTests :: [Test] -markdownCitationTests - = map styleToTest ["chicago-author-date","ieee","mhra"] - ++ [test "natbib" wopts "markdown-citations.txt" - "markdown-citations.txt"] - where - ropts = ["-r", "markdown", "-w", "markdown", "--bibliography", - "biblio.bib", "--no-wrap"] - wopts = ropts ++ ["--natbib"] - styleToTest style = test style (ropts ++ ["--csl", style ++ ".csl"]) - "markdown-citations.txt" - ("markdown-citations." ++ style ++ ".txt") - --- | Run a test without normalize function, return True if test passed. -test :: String -- ^ Title of test - -> [String] -- ^ Options to pass to pandoc - -> String -- ^ Input filepath - -> FilePath -- ^ Norm (for test results) filepath - -> Test -test = testWithNormalize id - --- | Run a test with normalize function, return True if test passed. -testWithNormalize :: (String -> String) -- ^ Normalize function for output - -> String -- ^ Title of test - -> [String] -- ^ Options to pass to pandoc - -> String -- ^ Input filepath - -> FilePath -- ^ Norm (for test results) filepath - -> Test -testWithNormalize normalizer testname opts inp norm = testCase testname $ do - (outputPath, hOut) <- openTempFile "" "pandoc-test" - let inpPath = inp - let normPath = norm - let options = ["--data-dir", ".."] ++ [inpPath] ++ opts - let cmd = pandocPath ++ " " ++ unwords options - ph <- runProcess pandocPath options Nothing - (Just [("LANG","en_US.UTF-8"),("HOME", "./")]) Nothing (Just hOut) - (Just stderr) - ec <- waitForProcess ph - result <- if ec == ExitSuccess - then do - -- filter \r so the tests will work on Windows machines - outputContents <- readFile' outputPath >>= - return . filter (/='\r') . normalizer - normContents <- readFile' normPath >>= - return . filter (/='\r') . normalizer - if outputContents == normContents - then return TestPassed - else return - $ TestFailed cmd normPath - $ getDiff (lines outputContents) (lines normContents) - else return $ TestError ec - removeFile outputPath - assertBool (show result) (result == TestPassed) diff --git a/src/Tests/Readers/LaTeX.hs b/src/Tests/Readers/LaTeX.hs deleted file mode 100644 index d60026b20..000000000 --- a/src/Tests/Readers/LaTeX.hs +++ /dev/null @@ -1,169 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -module Tests.Readers.LaTeX (tests) where - -import Text.Pandoc.Definition -import Test.Framework -import Tests.Helpers -import Tests.Arbitrary() -import Text.Pandoc.Builder -import Text.Pandoc - -latex :: String -> Pandoc -latex = readLaTeX defaultParserState - -infix 4 =: -(=:) :: ToString c - => String -> (String, c) -> Test -(=:) = test latex - -tests :: [Test] -tests = [ testGroup "basic" - [ "simple" =: - "word" =?> para "word" - , "space" =: - "some text" =?> para ("some text") - , "emphasized" =: - "\\emph{emphasized}" =?> para (emph "emphasized") - ] - - , testGroup "headers" - [ "level 1" =: - "\\section{header}" =?> header 1 "header" - , "level 2" =: - "\\subsection{header}" =?> header 2 "header" - , "level 3" =: - "\\subsubsection{header}" =?> header 3 "header" - , "emph" =: - "\\section{text \\emph{emph}}" =?> - header 1 ("text" <> space <> emph "emph") - , "link" =: - "\\section{text \\href{/url}{link}}" =?> - header 1 ("text" <> space <> link "/url" "" "link") - ] - - , testGroup "math" - [ "escaped $" =: - "$x=\\$4$" =?> para (math "x=\\$4") - ] - - , testGroup "space and comments" - [ "blank lines + space at beginning" =: - "\n \n hi" =?> para "hi" - , "blank lines + space + comments" =: - "% my comment\n\n \n % another\n\nhi" =?> para "hi" - , "comment in paragraph" =: - "hi % this is a comment\nthere\n" =?> para "hi there" - ] - - , testGroup "citations" - [ natbibCitations - , biblatexCitations - ] - ] - -baseCitation :: Citation -baseCitation = Citation{ citationId = "item1" - , citationPrefix = [] - , citationSuffix = [] - , citationMode = AuthorInText - , citationNoteNum = 0 - , citationHash = 0 } - -rt :: String -> Inlines -rt = rawInline "latex" - -natbibCitations :: Test -natbibCitations = testGroup "natbib" - [ "citet" =: "\\citet{item1}" - =?> para (cite [baseCitation] (rt "\\citet{item1}")) - , "suffix" =: "\\citet[p.~30]{item1}" - =?> para - (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\citet[p.~30]{item1}")) - , "suffix long" =: "\\citet[p.~30, with suffix]{item1}" - =?> para (cite [baseCitation{ citationSuffix = - toList $ text ", p.\160\&30, with suffix" }] (rt "\\citet[p.~30, with suffix]{item1}")) - , "multiple" =: "\\citeauthor{item1} \\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}" - =?> para (cite [baseCitation{ citationMode = AuthorInText } - ,baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str ",",Space,Str "p.\160\&30"] - , citationId = "item2" } - ,baseCitation{ citationId = "item3" - , citationPrefix = [Str "see",Space,Str "also"] - , citationMode = NormalCitation } - ] (rt "\\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}")) - , "group" =: "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}" - =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationPrefix = [Str "see"] - , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] } - ,baseCitation{ citationMode = NormalCitation - , citationId = "item3" - , citationPrefix = [Str "also"] - , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] } - ] (rt "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}")) - , "suffix and locator" =: "\\citep[pp.~33, 35--37, and nowhere else]{item1}" - =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\citep[pp.~33, 35--37, and nowhere else]{item1}")) - , "suffix only" =: "\\citep[and nowhere else]{item1}" - =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\citep[and nowhere else]{item1}")) - , "no author" =: "\\citeyearpar{item1}, and now Doe with a locator \\citeyearpar[p.~44]{item2}" - =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\citeyearpar{item1}") <> - text ", and now Doe with a locator " <> - cite [baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str ",",Space,Str "p.\160\&44"] - , citationId = "item2" }] (rt "\\citeyearpar[p.~44]{item2}")) - , "markup" =: "\\citep[\\emph{see}][p. \\textbf{32}]{item1}" - =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationPrefix = [Emph [Str "see"]] - , citationSuffix = [Str ",",Space,Str "p.",Space, - Strong [Str "32"]] }] (rt "\\citep[\\emph{see}][p. \\textbf{32}]{item1}")) - ] - -biblatexCitations :: Test -biblatexCitations = testGroup "biblatex" - [ "textcite" =: "\\textcite{item1}" - =?> para (cite [baseCitation] (rt "\\textcite{item1}")) - , "suffix" =: "\\textcite[p.~30]{item1}" - =?> para - (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\textcite[p.~30]{item1}")) - , "suffix long" =: "\\textcite[p.~30, with suffix]{item1}" - =?> para (cite [baseCitation{ citationSuffix = - toList $ text ", p.\160\&30, with suffix" }] (rt "\\textcite[p.~30, with suffix]{item1}")) - , "multiple" =: "\\textcites{item1}[p.~30]{item2}[see also][]{item3}" - =?> para (cite [baseCitation{ citationMode = AuthorInText } - ,baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str ",",Space,Str "p.\160\&30"] - , citationId = "item2" } - ,baseCitation{ citationId = "item3" - , citationPrefix = [Str "see",Space,Str "also"] - , citationMode = NormalCitation } - ] (rt "\\textcites{item1}[p.~30]{item2}[see also][]{item3}")) - , "group" =: "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}" - =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationPrefix = [Str "see"] - , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] } - ,baseCitation{ citationMode = NormalCitation - , citationId = "item3" - , citationPrefix = [Str "also"] - , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] } - ] (rt "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}")) - , "suffix and locator" =: "\\autocite[pp.~33, 35--37, and nowhere else]{item1}" - =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\autocite[pp.~33, 35--37, and nowhere else]{item1}")) - , "suffix only" =: "\\autocite[and nowhere else]{item1}" - =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\autocite[and nowhere else]{item1}")) - , "no author" =: "\\autocite*{item1}, and now Doe with a locator \\autocite*[p.~44]{item2}" - =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\autocite*{item1}") <> - text ", and now Doe with a locator " <> - cite [baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str ",",Space,Str "p.\160\&44"] - , citationId = "item2" }] (rt "\\autocite*[p.~44]{item2}")) - , "markup" =: "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}" - =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationPrefix = [Emph [Str "see"]] - , citationSuffix = [Str ",",Space,Str "p.",Space, - Strong [Str "32"]] }] (rt "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}")) - , "parencite" =: "\\parencite{item1}" - =?> para (cite [baseCitation{ citationMode = NormalCitation }] (rt "\\parencite{item1}")) - ] diff --git a/src/Tests/Readers/Markdown.hs b/src/Tests/Readers/Markdown.hs deleted file mode 100644 index 5ad974adf..000000000 --- a/src/Tests/Readers/Markdown.hs +++ /dev/null @@ -1,107 +0,0 @@ -{-# LANGUAGE OverloadedStrings, QuasiQuotes #-} -module Tests.Readers.Markdown (tests) where - -import Text.Pandoc.Definition -import Test.Framework -import Tests.Helpers -import Tests.Arbitrary() -import Text.Pandoc.Builder --- import Text.Pandoc.Shared ( normalize ) -import Text.Pandoc - -markdown :: String -> Pandoc -markdown = readMarkdown defaultParserState{ stateStandalone = True } - -markdownSmart :: String -> Pandoc -markdownSmart = readMarkdown defaultParserState{ stateSmart = True } - -infix 4 =: -(=:) :: ToString c - => String -> (String, c) -> Test -(=:) = test markdown - -{- -p_markdown_round_trip :: Block -> Bool -p_markdown_round_trip b = matches d' d'' - where d' = normalize $ Pandoc (Meta [] [] []) [b] - d'' = normalize - $ readMarkdown defaultParserState{ stateSmart = True } - $ writeMarkdown defaultWriterOptions d' - matches (Pandoc _ [Plain []]) (Pandoc _ []) = True - matches (Pandoc _ [Para []]) (Pandoc _ []) = True - matches (Pandoc _ [Plain xs]) (Pandoc _ [Para xs']) = xs == xs' - matches x y = x == y --} - -tests :: [Test] -tests = [ testGroup "inline code" - [ "with attribute" =: - "`document.write(\"Hello\");`{.javascript}" - =?> para - (codeWith ("",["javascript"],[]) "document.write(\"Hello\");") - , "with attribute space" =: - "`*` {.haskell .special x=\"7\"}" - =?> para (codeWith ("",["haskell","special"],[("x","7")]) "*") - ] - , testGroup "backslash escapes" - [ "in URL" =: - "[hi](/there\\))" - =?> para (link "/there)" "" "hi") - , "in title" =: - "[hi](/there \"a\\\"a\")" - =?> para (link "/there" "a\"a" "hi") - , "in reference link title" =: - "[hi]\n\n[hi]: /there (a\\)a)" - =?> para (link "/there" "a)a" "hi") - , "in reference link URL" =: - "[hi]\n\n[hi]: /there\\.0" - =?> para (link "/there.0" "" "hi") - ] - , testGroup "smart punctuation" - [ test markdownSmart "quote before ellipses" - ("'...hi'" - =?> para (singleQuoted ("…hi"))) - , test markdownSmart "apostrophe before emph" - ("D'oh! A l'*aide*!" - =?> para ("D’oh! A l’" <> emph "aide" <> "!")) - , test markdownSmart "apostrophe in French" - ("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»" - =?> para ("À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»")) - ] - , testGroup "mixed emphasis and strong" - [ "emph and strong emph alternating" =: - "*xxx* ***xxx*** xxx\n*xxx* ***xxx*** xxx" - =?> para (emph "xxx" <> space <> strong (emph "xxx") <> - space <> "xxx" <> space <> - emph "xxx" <> space <> strong (emph "xxx") <> - space <> "xxx") - , "emph with spaced strong" =: - "*x **xx** x*" - =?> para (emph ("x" <> space <> strong "xx" <> space <> "x")) - ] - , testGroup "footnotes" - [ "indent followed by newline and flush-left text" =: - "[^1]\n\n[^1]: my note\n\n \nnot in note\n" - =?> para (note (para "my note")) <> para "not in note" - , "indent followed by newline and indented text" =: - "[^1]\n\n[^1]: my note\n \n in note\n" - =?> para (note (para "my note" <> para "in note")) - , "recursive note" =: - "[^1]\n\n[^1]: See [^1]\n" - =?> para (note (para "See [^1]")) - ] - , testGroup "lhs" - [ test (readMarkdown defaultParserState{stateLiterateHaskell = True}) - "inverse bird tracks and html" $ - "> a\n\n< b\n\n<div>\n" - =?> codeBlockWith ("",["sourceCode","literate","haskell"],[]) "a" - <> - codeBlockWith ("",["sourceCode","haskell"],[]) "b" - <> - rawBlock "html" "<div>\n\n" - ] --- the round-trip properties frequently fail --- , testGroup "round trip" --- [ property "p_markdown_round_trip" p_markdown_round_trip --- ] - ] diff --git a/src/Tests/Readers/RST.hs b/src/Tests/Readers/RST.hs deleted file mode 100644 index 3269092a6..000000000 --- a/src/Tests/Readers/RST.hs +++ /dev/null @@ -1,60 +0,0 @@ -{-# LANGUAGE OverloadedStrings, QuasiQuotes #-} -module Tests.Readers.RST (tests) where - -import Text.Pandoc.Definition -import Test.Framework -import Tests.Helpers -import Tests.Arbitrary() -import Text.Pandoc.Builder -import Text.Pandoc - -rst :: String -> Pandoc -rst = readRST defaultParserState{ stateStandalone = True } - -infix 4 =: -(=:) :: ToString c - => String -> (String, c) -> Test -(=:) = test rst - -tests :: [Test] -tests = [ "line block with blank line" =: - "| a\n|\n| b" =?> para (str "a" <> linebreak <> - linebreak <> str " " <> str "b") - , "field list" =: - [_LIT| -:Hostname: media08 -:IP address: 10.0.0.19 -:Size: 3ru -:Date: 2001-08-16 -:Version: 1 -:Authors: - Me - - Myself - - I -:Indentation: Since the field marker may be quite long, the second - and subsequent lines of the field body do not have to line up - with the first line, but they must be indented relative to the - field name marker, and they must line up with each other. -:Parameter i: integer -:Final: item - on two lines -|] =?> ( setAuthors ["Me","Myself","I"] - $ setDate "2001-08-16" - $ doc - $ definitionList [ (str "Hostname", [para "media08"]) - , (str "IP address", [para "10.0.0.19"]) - , (str "Size", [para "3ru"]) - , (str "Version", [para "1"]) - , (str "Indentation", [para "Since the field marker may be quite long, the second and subsequent lines of the field body do not have to line up with the first line, but they must be indented relative to the field name marker, and they must line up with each other."]) - , (str "Parameter i", [para "integer"]) - , (str "Final", [para "item on two lines"]) - ]) - , "URLs with following punctuation" =: - ("http://google.com, http://yahoo.com; http://foo.bar.baz.\n" ++ - "http://foo.bar/baz_(bam) (http://foo.bar)") =?> - para (link "http://google.com" "" "http://google.com" <> ", " <> - link "http://yahoo.com" "" "http://yahoo.com" <> "; " <> - link "http://foo.bar.baz" "" "http://foo.bar.baz" <> ". " <> - link "http://foo.bar/baz_(bam)" "" "http://foo.bar/baz_(bam)" - <> " (" <> link "http://foo.bar" "" "http://foo.bar" <> ")") - ] - diff --git a/src/Tests/Shared.hs b/src/Tests/Shared.hs deleted file mode 100644 index f4bf13da4..000000000 --- a/src/Tests/Shared.hs +++ /dev/null @@ -1,26 +0,0 @@ -module Tests.Shared (tests) where - -import Text.Pandoc.Definition -import Text.Pandoc.Shared -import Test.Framework -import Tests.Helpers -import Tests.Arbitrary() - -tests :: [Test] -tests = [ testGroup "normalize" - [ property "p_normalize_blocks_rt" p_normalize_blocks_rt - , property "p_normalize_inlines_rt" p_normalize_inlines_rt - , property "p_normalize_no_trailing_spaces" - p_normalize_no_trailing_spaces - ] - ] - -p_normalize_blocks_rt :: [Block] -> Bool -p_normalize_blocks_rt bs = normalize bs == normalize (normalize bs) - -p_normalize_inlines_rt :: [Inline] -> Bool -p_normalize_inlines_rt ils = normalize ils == normalize (normalize ils) - -p_normalize_no_trailing_spaces :: [Inline] -> Bool -p_normalize_no_trailing_spaces ils = null ils' || last ils' /= Space - where ils' = normalize $ ils ++ [Space] diff --git a/src/Tests/Writers/ConTeXt.hs b/src/Tests/Writers/ConTeXt.hs deleted file mode 100644 index beb6411f0..000000000 --- a/src/Tests/Writers/ConTeXt.hs +++ /dev/null @@ -1,70 +0,0 @@ -{-# LANGUAGE OverloadedStrings, QuasiQuotes #-} -module Tests.Writers.ConTeXt (tests) where - -import Test.Framework -import Text.Pandoc.Builder -import Text.Pandoc -import Tests.Helpers -import Tests.Arbitrary() - -context :: (ToString a, ToPandoc a) => a -> String -context = writeConTeXt defaultWriterOptions . toPandoc - -context' :: (ToString a, ToPandoc a) => a -> String -context' = writeConTeXt defaultWriterOptions{ writerWrapText = False } - . toPandoc - -{- - "my test" =: X =?> Y - -is shorthand for - - test context "my test" $ X =?> Y - -which is in turn shorthand for - - test context "my test" (X,Y) --} - -infix 4 =: -(=:) :: (ToString a, ToPandoc a) - => String -> (a, String) -> Test -(=:) = test context - -tests :: [Test] -tests = [ testGroup "inline code" - [ "with '}'" =: code "}" =?> "\\mono{\\}}" - , "without '}'" =: code "]" =?> "\\type{]}" - , property "code property" $ \s -> null s || - if '{' `elem` s || '}' `elem` s - then (context' $ code s) == "\\mono{" ++ - (context' $ str s) ++ "}" - else (context' $ code s) == "\\type{" ++ s ++ "}" - ] - , testGroup "headers" - [ "level 1" =: - header 1 "My header" =?> "\\section[my-header]{My header}" - ] - , testGroup "bullet lists" - [ "nested" =: - bulletList [plain (text "top") - ,bulletList [plain (text "next") - ,bulletList [plain (text "bot")]]] - =?> [_LIT| -\startitemize -\item - top -\item - \startitemize - \item - next - \item - \startitemize - \item - bot - \stopitemize - \stopitemize -\stopitemize|] - ] - ] - diff --git a/src/Tests/Writers/HTML.hs b/src/Tests/Writers/HTML.hs deleted file mode 100644 index 8561aa421..000000000 --- a/src/Tests/Writers/HTML.hs +++ /dev/null @@ -1,46 +0,0 @@ -{-# LANGUAGE OverloadedStrings, QuasiQuotes #-} -module Tests.Writers.HTML (tests) where - -import Test.Framework -import Text.Pandoc.Builder -import Text.Pandoc -import Tests.Helpers -import Tests.Arbitrary() -import Text.Pandoc.Highlighting (languages) -- null if no hl support - -html :: (ToString a, ToPandoc a) => a -> String -html = writeHtmlString defaultWriterOptions{ writerWrapText = False } . toPandoc - -{- - "my test" =: X =?> Y - -is shorthand for - - test html "my test" $ X =?> Y - -which is in turn shorthand for - - test html "my test" (X,Y) --} - -infix 4 =: -(=:) :: (ToString a, ToPandoc a) - => String -> (a, String) -> Test -(=:) = test html - -tests :: [Test] -tests = [ testGroup "inline code" - [ "basic" =: code "@&" =?> "<code>@&</code>" - , "haskell" =: codeWith ("",["haskell"],[]) ">>=" - =?> if null languages - then "<code class=\"haskell\">>>=</code>" - else "<code class=\"sourceCode haskell\"><span class=\"fu\">>>=</span></code>" - , "nolanguage" =: codeWith ("",["nolanguage"],[]) ">>=" - =?> "<code class=\"nolanguage\">>>=</code>" - ] - , testGroup "images" - [ "alt with formatting" =: - image "/url" "title" ("my " <> emph "image") - =?> "<img src=\"/url\" title=\"title\" alt=\"my image\" />" - ] - ] diff --git a/src/Tests/Writers/LaTeX.hs b/src/Tests/Writers/LaTeX.hs deleted file mode 100644 index 7987716f3..000000000 --- a/src/Tests/Writers/LaTeX.hs +++ /dev/null @@ -1,35 +0,0 @@ -{-# LANGUAGE OverloadedStrings, QuasiQuotes #-} -module Tests.Writers.LaTeX (tests) where - -import Test.Framework -import Text.Pandoc.Builder -import Text.Pandoc -import Tests.Helpers -import Tests.Arbitrary() - -latex :: (ToString a, ToPandoc a) => a -> String -latex = writeLaTeX defaultWriterOptions . toPandoc - -{- - "my test" =: X =?> Y - -is shorthand for - - test latex "my test" $ X =?> Y - -which is in turn shorthand for - - test latex "my test" (X,Y) --} - -infix 4 =: -(=:) :: (ToString a, ToPandoc a) - => String -> (a, String) -> Test -(=:) = test latex - -tests :: [Test] -tests = [ testGroup "code blocks" - [ "in footnotes" =: note (para "hi" <> codeBlock "hi") =?> - "\\footnote{hi\n\n\\begin{Verbatim}\nhi\n\\end{Verbatim}\n}" - ] - ] diff --git a/src/Tests/Writers/Markdown.hs b/src/Tests/Writers/Markdown.hs deleted file mode 100644 index d90dc83b1..000000000 --- a/src/Tests/Writers/Markdown.hs +++ /dev/null @@ -1,34 +0,0 @@ -{-# LANGUAGE OverloadedStrings, QuasiQuotes #-} -module Tests.Writers.Markdown (tests) where - -import Test.Framework -import Text.Pandoc.Builder -import Text.Pandoc -import Tests.Helpers -import Tests.Arbitrary() - -markdown :: (ToString a, ToPandoc a) => a -> String -markdown = writeMarkdown defaultWriterOptions . toPandoc - -{- - "my test" =: X =?> Y - -is shorthand for - - test markdown "my test" $ X =?> Y - -which is in turn shorthand for - - test markdown "my test" (X,Y) --} - -infix 4 =: -(=:) :: (ToString a, ToPandoc a) - => String -> (a, String) -> Test -(=:) = test markdown - -tests :: [Test] -tests = [ "indented code after list" - =: (orderedList [ para "one" <> para "two" ] <> codeBlock "test") - =?> "1. one\n\n two\n\n<!-- -->\n\n test" - ] diff --git a/src/Tests/Writers/Native.hs b/src/Tests/Writers/Native.hs deleted file mode 100644 index 19740e0f4..000000000 --- a/src/Tests/Writers/Native.hs +++ /dev/null @@ -1,21 +0,0 @@ -module Tests.Writers.Native (tests) where - -import Test.Framework -import Text.Pandoc.Builder -import Text.Pandoc -import Tests.Helpers -import Tests.Arbitrary() - -p_write_rt :: Pandoc -> Bool -p_write_rt d = - read (writeNative defaultWriterOptions{ writerStandalone = True } d) == d - -p_write_blocks_rt :: [Block] -> Bool -p_write_blocks_rt bs = length bs > 20 || - read (writeNative defaultWriterOptions (Pandoc (Meta [] [] []) bs)) == - bs - -tests :: [Test] -tests = [ property "p_write_rt" p_write_rt - , property "p_write_blocks_rt" p_write_blocks_rt - ] diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 432a5c2ba..33706816e 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable This helper module exports the main writers, readers, and data @@ -43,9 +43,8 @@ inline links: > > markdownToRST :: String -> String > markdownToRST = -> (writeRST defaultWriterOptions {writerReferenceLinks = True}) . -> readMarkdown defaultParserState -> +> (writeRST def {writerReferenceLinks = True}) . readMarkdown def +> > main = getContents >>= putStrLn . markdownToRST Note: all of the readers assume that the input text has @'\n'@ @@ -55,11 +54,13 @@ you should remove @'\r'@ characters using @filter (/='\r')@. -} module Text.Pandoc - ( + ( -- * Definitions module Text.Pandoc.Definition -- * Generics , module Text.Pandoc.Generic + -- * Options + , module Text.Pandoc.Options -- * Lists of readers and writers , readers , writers @@ -71,15 +72,8 @@ module Text.Pandoc , readTextile , readDocBook , readNative - -- * Parser state used in readers - , ParserState (..) - , defaultParserState - , ParserContext (..) - , QuoteContext (..) - , KeyTable - , NoteTable - , HeaderType (..) -- * Writers: converting /from/ Pandoc format + , Writer (..) , writeNative , writeMarkdown , writePlain @@ -98,19 +92,16 @@ module Text.Pandoc , writeODT , writeDocx , writeEPUB + , writeFB2 , writeOrg , writeAsciiDoc - -- * Writer options used in writers - , WriterOptions (..) - , HTMLSlideVariant (..) - , HTMLMathMethod (..) - , CiteMethod (..) - , defaultWriterOptions -- * Rendering templates and default templates , module Text.Pandoc.Templates -- * Version , pandocVersion -- * Miscellaneous + , getReader + , getWriter , rtfEmbedImage , jsonFilter , ToJsonFilter(..) @@ -127,7 +118,7 @@ import Text.Pandoc.Readers.Textile import Text.Pandoc.Readers.Native import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown -import Text.Pandoc.Writers.RST +import Text.Pandoc.Writers.RST import Text.Pandoc.Writers.LaTeX import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.Texinfo @@ -135,85 +126,143 @@ import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.ODT import Text.Pandoc.Writers.Docx import Text.Pandoc.Writers.EPUB +import Text.Pandoc.Writers.FB2 import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OpenDocument import Text.Pandoc.Writers.Man -import Text.Pandoc.Writers.RTF +import Text.Pandoc.Writers.RTF import Text.Pandoc.Writers.MediaWiki import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Org import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Templates -import Text.Pandoc.Parsing -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared (safeRead) +import Data.ByteString.Lazy (ByteString) +import Data.List (intercalate) import Data.Version (showVersion) import Text.JSON.Generic +import Data.Set (Set) +import qualified Data.Set as Set +import Text.Parsec +import Text.Parsec.Error import Paths_pandoc (version) -- | Version number of pandoc library. pandocVersion :: String pandocVersion = showVersion version +parseFormatSpec :: String + -> Either ParseError (String, Set Extension -> Set Extension) +parseFormatSpec = parse formatSpec "" + where formatSpec = do + name <- formatName + extMods <- many extMod + return (name, foldl (.) id extMods) + formatName = many1 $ noneOf "-+" + extMod = do + polarity <- oneOf "-+" + name <- many $ noneOf "-+" + ext <- case safeRead ("Ext_" ++ name) of + Just n -> return n + Nothing + | name == "lhs" -> return Ext_literate_haskell + | otherwise -> fail $ "Unknown extension: " ++ name + return $ case polarity of + '-' -> Set.delete ext + _ -> Set.insert ext + -- | Association list of formats and readers. -readers :: [(String, ParserState -> String -> Pandoc)] +readers :: [(String, ReaderOptions -> String -> Pandoc)] readers = [("native" , \_ -> readNative) ,("json" , \_ -> decodeJSON) + ,("markdown_strict" , readMarkdown) ,("markdown" , readMarkdown) - ,("markdown+lhs" , \st -> - readMarkdown st{ stateLiterateHaskell = True}) ,("rst" , readRST) - ,("rst+lhs" , \st -> - readRST st{ stateLiterateHaskell = True}) ,("docbook" , readDocBook) - ,("textile" , readTextile) -- TODO : textile+lhs + ,("textile" , readTextile) -- TODO : textile+lhs ,("html" , readHtml) ,("latex" , readLaTeX) - ,("latex+lhs" , \st -> - readLaTeX st{ stateLiterateHaskell = True}) ] --- | Association list of formats and writers (omitting the --- binary writers, odt, docx, and epub). -writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] -writers = [("native" , writeNative) - ,("json" , \_ -> encodeJSON) - ,("html" , writeHtmlString) - ,("html5" , \o -> - writeHtmlString o{ writerHtml5 = True }) - ,("html+lhs" , \o -> - writeHtmlString o{ writerLiterateHaskell = True }) - ,("html5+lhs" , \o -> - writeHtmlString o{ writerLiterateHaskell = True, - writerHtml5 = True }) - ,("s5" , writeHtmlString) - ,("slidy" , writeHtmlString) - ,("slideous" , writeHtmlString) - ,("dzslides" , writeHtmlString) - ,("docbook" , writeDocbook) - ,("opendocument" , writeOpenDocument) - ,("latex" , writeLaTeX) - ,("latex+lhs" , \o -> - writeLaTeX o{ writerLiterateHaskell = True }) - ,("beamer" , \o -> - writeLaTeX o{ writerBeamer = True }) - ,("beamer+lhs" , \o -> - writeLaTeX o{ writerBeamer = True, writerLiterateHaskell = True }) - ,("context" , writeConTeXt) - ,("texinfo" , writeTexinfo) - ,("man" , writeMan) - ,("markdown" , writeMarkdown) - ,("markdown+lhs" , \o -> - writeMarkdown o{ writerLiterateHaskell = True }) - ,("plain" , writePlain) - ,("rst" , writeRST) - ,("rst+lhs" , \o -> - writeRST o{ writerLiterateHaskell = True }) - ,("mediawiki" , writeMediaWiki) - ,("textile" , writeTextile) - ,("rtf" , writeRTF) - ,("org" , writeOrg) - ,("asciidoc" , writeAsciiDoc) - ] +data Writer = PureStringWriter (WriterOptions -> Pandoc -> String) + | IOStringWriter (WriterOptions -> Pandoc -> IO String) + | IOByteStringWriter (WriterOptions -> Pandoc -> IO ByteString) + +-- | Association list of formats and writers. +writers :: [ ( String, Writer ) ] +writers = [ + ("native" , PureStringWriter writeNative) + ,("json" , PureStringWriter $ \_ -> encodeJSON) + ,("docx" , IOByteStringWriter writeDocx) + ,("odt" , IOByteStringWriter writeODT) + ,("epub" , IOByteStringWriter writeEPUB) + ,("fb2" , IOStringWriter writeFB2) + ,("html" , PureStringWriter writeHtmlString) + ,("html5" , PureStringWriter $ \o -> + writeHtmlString o{ writerHtml5 = True }) + ,("s5" , PureStringWriter $ \o -> + writeHtmlString o{ writerSlideVariant = S5Slides + , writerTableOfContents = False }) + ,("slidy" , PureStringWriter $ \o -> + writeHtmlString o{ writerSlideVariant = SlidySlides }) + ,("slideous" , PureStringWriter $ \o -> + writeHtmlString o{ writerSlideVariant = SlideousSlides }) + ,("dzslides" , PureStringWriter $ \o -> + writeHtmlString o{ writerSlideVariant = DZSlides + , writerHtml5 = True }) + ,("docbook" , PureStringWriter writeDocbook) + ,("opendocument" , PureStringWriter writeOpenDocument) + ,("latex" , PureStringWriter writeLaTeX) + ,("beamer" , PureStringWriter $ \o -> + writeLaTeX o{ writerBeamer = True }) + ,("context" , PureStringWriter writeConTeXt) + ,("texinfo" , PureStringWriter writeTexinfo) + ,("man" , PureStringWriter writeMan) + ,("markdown" , PureStringWriter writeMarkdown) + ,("markdown_strict" , PureStringWriter writeMarkdown) + ,("plain" , PureStringWriter writePlain) + ,("rst" , PureStringWriter writeRST) + ,("mediawiki" , PureStringWriter writeMediaWiki) + ,("textile" , PureStringWriter writeTextile) + ,("rtf" , PureStringWriter writeRTF) + ,("org" , PureStringWriter writeOrg) + ,("asciidoc" , PureStringWriter writeAsciiDoc) + ] + +getDefaultExtensions :: String -> Set Extension +getDefaultExtensions "markdown_strict" = strictExtensions +getDefaultExtensions _ = pandocExtensions + +-- | Retrieve reader based on formatSpec (format+extensions). +getReader :: String -> Either String (ReaderOptions -> String -> Pandoc) +getReader s = + case parseFormatSpec s of + Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] + Right (readerName, setExts) -> + case lookup readerName readers of + Nothing -> Left $ "Unknown reader: " ++ readerName + Just r -> Right $ \o -> + r o{ readerExtensions = setExts $ + getDefaultExtensions readerName } + +-- | Retrieve writer based on formatSpec (format+extensions). +getWriter :: String -> Either String Writer +getWriter s = + case parseFormatSpec s of + Left e -> Left $ intercalate "\n" $ [m | Message m <- errorMessages e] + Right (writerName, setExts) -> + case lookup writerName writers of + Nothing -> Left $ "Unknown writer: " ++ writerName + Just (PureStringWriter r) -> Right $ PureStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } + Just (IOStringWriter r) -> Right $ IOStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } + Just (IOByteStringWriter r) -> Right $ IOByteStringWriter $ + \o -> r o{ writerExtensions = setExts $ + getDefaultExtensions writerName } {-# DEPRECATED jsonFilter "Use toJsonFilter instead" #-} -- | Converts a transformation on the Pandoc AST into a function diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index cece13fba..13569a4d9 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -38,7 +38,7 @@ import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Shared (stringify) -import Text.ParserCombinators.Parsec +import Text.Parsec import Control.Monad -- | Process a 'Pandoc' document by adding citations formatted @@ -165,7 +165,7 @@ locatorWords inp = breakup (x : xs) = x : breakup xs splitup = groupBy (\x y -> x /= '\160' && y /= '\160') -pLocatorWords :: GenParser Inline st (String, [Inline]) +pLocatorWords :: Parsec [Inline] st (String, [Inline]) pLocatorWords = do l <- pLocator s <- getInput -- rest is suffix @@ -173,16 +173,16 @@ pLocatorWords = do then return (init l, Str "," : s) else return (l, s) -pMatch :: (Inline -> Bool) -> GenParser Inline st Inline +pMatch :: (Inline -> Bool) -> Parsec [Inline] st Inline pMatch condition = try $ do t <- anyToken guard $ condition t return t -pSpace :: GenParser Inline st Inline +pSpace :: Parsec [Inline] st Inline pSpace = pMatch (\t -> t == Space || t == Str "\160") -pLocator :: GenParser Inline st String +pLocator :: Parsec [Inline] st String pLocator = try $ do optional $ pMatch (== Str ",") optional pSpace @@ -190,7 +190,7 @@ pLocator = try $ do gs <- many1 pWordWithDigits return $ stringify f ++ (' ' : unwords gs) -pWordWithDigits :: GenParser Inline st String +pWordWithDigits :: Parsec [Inline] st String pWordWithDigits = try $ do pSpace r <- many1 (notFollowedBy pSpace >> anyToken) diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 080acebee..95df88099 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -47,6 +47,7 @@ module Text.Pandoc.Highlighting ( languages , Style ) where import Text.Pandoc.Definition +import Text.Pandoc.Shared (safeRead) import Text.Highlighting.Kate import Data.List (find) import Data.Maybe (fromMaybe) @@ -60,9 +61,9 @@ highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter -> String -- ^ Raw contents of the CodeBlock -> Maybe a -- ^ Maybe the formatted result highlight formatter (_, classes, keyvals) rawCode = - let firstNum = case reads (fromMaybe "1" $ lookup "startFrom" keyvals) of - ((n,_):_) -> n - [] -> 1 + let firstNum = case safeRead (fromMaybe "1" $ lookup "startFrom" keyvals) of + Just n -> n + Nothing -> 1 fmtOpts = defaultFormatOpts{ startNumber = firstNum, numberLines = any (`elem` diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index f9749cece..9cde57e4d 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.MIME Copyright : Copyright (C) 2011 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs new file mode 100644 index 000000000..a9c8bf710 --- /dev/null +++ b/src/Text/Pandoc/Options.hs @@ -0,0 +1,277 @@ +{- +Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Options + Copyright : Copyright (C) 2012 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Data structures and functions for representing parser and writer +options. +-} +module Text.Pandoc.Options ( Extension(..) + , pandocExtensions + , strictExtensions + , ReaderOptions(..) + , HTMLMathMethod (..) + , CiteMethod (..) + , ObfuscationMethod (..) + , HTMLSlideVariant (..) + , WriterOptions (..) + , def + , isEnabled + ) where +import Data.Set (Set) +import qualified Data.Set as Set +import Data.Default +import Text.Pandoc.Highlighting (Style, pygments) + +-- | Individually selectable syntax extensions. +data Extension = + Ext_footnotes -- ^ Pandoc/PHP/MMD style footnotes + | Ext_inline_notes -- ^ Pandoc-style inline notes + | Ext_pandoc_title_block -- ^ Pandoc title block + | Ext_mmd_title_block -- ^ Multimarkdown metadata block + | Ext_table_captions -- ^ Pandoc-style table captions + -- | Ext_image_captions + | Ext_simple_tables -- ^ Pandoc-style simple tables + | Ext_multiline_tables -- ^ Pandoc-style multiline tables + | Ext_grid_tables -- ^ Grid tables (pandoc, reST) + | Ext_pipe_tables -- ^ Pipe tables (as in PHP markdown extra) + | Ext_citations -- ^ Pandoc/citeproc citations + | Ext_raw_tex -- ^ Allow raw TeX (other than math) + | Ext_raw_html -- ^ Allow raw HTML + | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ + | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] + | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] + | Ext_latex_macros -- ^ Parse LaTeX macro definitions (for math only) + | Ext_fenced_code_blocks -- ^ Parse fenced code blocks + | Ext_fenced_code_attributes -- ^ Allow attributes on fenced code blocks + | Ext_backtick_code_blocks -- ^ Github style ``` code blocks + | Ext_inline_code_attributes -- ^ Allow attributes on inline code + | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks + | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown + -- iff container has attribute 'markdown' + | Ext_escaped_line_breaks -- ^ Treat a backslash at EOL as linebreak + | Ext_autolink_code_spans -- ^ Put autolink text inside code spans + | Ext_fancy_lists -- ^ Enable fancy list numbers and delimiters + | Ext_startnum -- ^ Make start number of ordered list significant + | Ext_definition_lists -- ^ Definition lists as in pandoc, mmd, php + | Ext_example_lists -- ^ Markdown-style numbered examples + | Ext_all_symbols_escapable -- ^ Make all non-alphanumerics escapable + | Ext_intraword_underscores -- ^ Treat underscore inside word as literal + | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote + | Ext_blank_before_header -- ^ Require blank line before a header + -- | Ext_significant_bullets + | Ext_strikeout -- ^ Strikeout using ~~this~~ syntax + | Ext_superscript -- ^ Superscript using ^this^ syntax + | Ext_subscript -- ^ Subscript using ~this~ syntax + | Ext_hard_line_breaks -- ^ All newlines become hard line breaks + | Ext_literate_haskell -- ^ Enable literate Haskell conventions + | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions + deriving (Show, Read, Enum, Eq, Ord, Bounded) + +pandocExtensions :: Set Extension +pandocExtensions = Set.fromList + [ Ext_footnotes + , Ext_inline_notes + , Ext_pandoc_title_block + , Ext_table_captions + -- , Ext_image_captions + , Ext_simple_tables + , Ext_multiline_tables + , Ext_grid_tables + , Ext_pipe_tables + , Ext_citations + , Ext_raw_tex + , Ext_raw_html + , Ext_tex_math_dollars + , Ext_latex_macros + , Ext_fenced_code_blocks + , Ext_fenced_code_attributes + , Ext_backtick_code_blocks + , Ext_inline_code_attributes + , Ext_markdown_in_html_blocks + , Ext_escaped_line_breaks + , Ext_autolink_code_spans + , Ext_fancy_lists + , Ext_startnum + , Ext_definition_lists + , Ext_example_lists + , Ext_all_symbols_escapable + , Ext_intraword_underscores + , Ext_blank_before_blockquote + , Ext_blank_before_header + -- , Ext_significant_bullets + , Ext_strikeout + , Ext_superscript + , Ext_subscript + ] + +strictExtensions :: Set Extension +strictExtensions = Set.fromList + [ Ext_raw_html ] + +data ReaderOptions = ReaderOptions{ + readerExtensions :: Set Extension -- ^ Syntax extensions + , readerSmart :: Bool -- ^ Smart punctuation + , readerStrict :: Bool -- ^ FOR TRANSITION ONLY + , readerStandalone :: Bool -- ^ Standalone document with header + , readerParseRaw :: Bool -- ^ Parse raw HTML, LaTeX + , readerColumns :: Int -- ^ Number of columns in terminal + , readerTabStop :: Int -- ^ Tab stop + , readerOldDashes :: Bool -- ^ Use pandoc <= 1.8.2.1 behavior + -- in parsing dashes; -- is em-dash; + -- - before numerial is en-dash + , readerCitations :: [String] -- ^ List of available citations + , readerApplyMacros :: Bool -- ^ Apply macros to TeX math + , readerIndentedCodeClasses :: [String] -- ^ Default classes for + -- indented code blocks +} deriving (Show, Read) + +instance Default ReaderOptions + where def = ReaderOptions{ + readerExtensions = pandocExtensions + , readerSmart = False + , readerStrict = False + , readerStandalone = False + , readerParseRaw = False + , readerColumns = 80 + , readerTabStop = 4 + , readerOldDashes = False + , readerCitations = [] + , readerApplyMacros = True + , readerIndentedCodeClasses = [] + } + +-- +-- Writer options +-- + +data HTMLMathMethod = PlainMath + | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js + | JsMath (Maybe String) -- url of jsMath load script + | GladTeX + | WebTeX String -- url of TeX->image script. + | MathML (Maybe String) -- url of MathMLinHTML.js + | MathJax String -- url of MathJax.js + deriving (Show, Read, Eq) + +data CiteMethod = Citeproc -- use citeproc to render them + | Natbib -- output natbib cite commands + | Biblatex -- output biblatex cite commands + deriving (Show, Read, Eq) + +-- | Methods for obfuscating email addresses in HTML. +data ObfuscationMethod = NoObfuscation + | ReferenceObfuscation + | JavascriptObfuscation + deriving (Show, Read, Eq) + +-- | Varieties of HTML slide shows. +data HTMLSlideVariant = S5Slides + | SlidySlides + | SlideousSlides + | DZSlides + | NoSlides + deriving (Show, Read, Eq) + +-- | Options for writers +data WriterOptions = WriterOptions + { writerStandalone :: Bool -- ^ Include header and footer + , writerTemplate :: String -- ^ Template to use in standalone mode + , writerVariables :: [(String, String)] -- ^ Variables to set in template + , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB + , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs + , writerTableOfContents :: Bool -- ^ Include table of contents + , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous? + , writerIncremental :: Bool -- ^ True if lists should be incremental + , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML + , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) + , writerNumberSections :: Bool -- ^ Number sections in LaTeX + , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML + , writerExtensions :: Set Extension -- ^ Markdown extensions that can be used + , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst + , writerWrapText :: Bool -- ^ Wrap text to line length + , writerColumns :: Int -- ^ Characters in a line (for text wrapping) + , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails + , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML + , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file + , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory + , writerCiteMethod :: CiteMethod -- ^ How to print cites + , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations + , writerHtml5 :: Bool -- ^ Produce HTML5 + , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show + , writerSlideLevel :: Maybe Int -- ^ Force header level of slides + , writerChapters :: Bool -- ^ Use "chapter" for top-level sects + , writerListings :: Bool -- ^ Use listings package for code + , writerHighlight :: Bool -- ^ Highlight source code + , writerHighlightStyle :: Style -- ^ Style to use for highlighting + , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown + , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex + , writerEpubStylesheet :: Maybe String -- ^ EPUB stylesheet specified at command line + , writerEpubFonts :: [FilePath] -- ^ Paths to fonts to embed + , writerReferenceODT :: Maybe FilePath -- ^ Path to reference ODT if specified + , writerReferenceDocx :: Maybe FilePath -- ^ Ptah to reference DOCX if specified + } deriving Show + +instance Default WriterOptions where + def = WriterOptions { writerStandalone = False + , writerTemplate = "" + , writerVariables = [] + , writerEPUBMetadata = "" + , writerTabStop = 4 + , writerTableOfContents = False + , writerSlideVariant = NoSlides + , writerIncremental = False + , writerHTMLMathMethod = PlainMath + , writerIgnoreNotes = False + , writerNumberSections = False + , writerSectionDivs = False + , writerExtensions = pandocExtensions + , writerReferenceLinks = False + , writerWrapText = True + , writerColumns = 72 + , writerEmailObfuscation = JavascriptObfuscation + , writerIdentifierPrefix = "" + , writerSourceDirectory = "." + , writerUserDataDir = Nothing + , writerCiteMethod = Citeproc + , writerBiblioFiles = [] + , writerHtml5 = False + , writerBeamer = False + , writerSlideLevel = Nothing + , writerChapters = False + , writerListings = False + , writerHighlight = False + , writerHighlightStyle = pygments + , writerSetextHeaders = True + , writerTeXLigatures = True + , writerEpubStylesheet = Nothing + , writerEpubFonts = [] + , writerReferenceODT = Nothing + , writerReferenceDocx = Nothing + } + +-- | Returns True if the given extension is enabled. +isEnabled :: Extension -> WriterOptions -> Bool +isEnabled ext opts = ext `Set.member` (writerExtensions opts) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index cac2b71ca..50691f409 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -19,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Parsing Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -48,8 +49,6 @@ module Text.Pandoc.Parsing ( (>>~), withHorizDisplacement, withRaw, nullBlock, - failIfStrict, - failUnlessLHS, escaped, characterReference, updateLastStrPos, @@ -57,37 +56,127 @@ module Text.Pandoc.Parsing ( (>>~), orderedListMarker, charRef, tableWith, + widthsFromIndices, gridTableWith, readWith, testStringWith, + getOption, + guardEnabled, + guardDisabled, ParserState (..), defaultParserState, HeaderType (..), ParserContext (..), QuoteContext (..), NoteTable, + NoteTable', KeyTable, - Key, + Key (..), toKey, - fromKey, - lookupKeySrc, smartPunctuation, + withQuoteContext, + singleQuoteStart, + singleQuoteEnd, + doubleQuoteStart, + doubleQuoteEnd, + ellipses, + apostrophe, + dash, macro, - applyMacros' ) + applyMacros', + Parser, + F(..), + runF, + askF, + asksF, + -- * Re-exports from Text.Pandoc.Parsec + runParser, + parse, + anyToken, + getInput, + setInput, + unexpected, + char, + letter, + digit, + alphaNum, + skipMany, + skipMany1, + spaces, + space, + anyChar, + satisfy, + newline, + string, + count, + eof, + noneOf, + oneOf, + lookAhead, + notFollowedBy, + many, + many1, + manyTill, + (<|>), + (<?>), + choice, + try, + sepBy, + sepBy1, + sepEndBy, + sepEndBy1, + endBy, + endBy1, + option, + optional, + optionMaybe, + getState, + setState, + updateState, + getPosition, + setPosition, + sourceColumn, + sourceLine, + newPos, + token + ) where import Text.Pandoc.Definition -import Text.Pandoc.Generic +import Text.Pandoc.Options +import Text.Pandoc.Builder (Blocks) import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) -import Text.ParserCombinators.Parsec +import Text.Parsec +import Text.Parsec.Pos (newPos) import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import Control.Monad ( join, liftM, guard ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) import Text.HTML.TagSoup.Entity ( lookupEntity ) +import Data.Default +import qualified Data.Set as Set +import Control.Monad.Reader +import Data.Monoid + +type Parser t s = Parsec t s + +newtype F a = F { unF :: Reader ParserState a } deriving (Monad, Functor) + +runF :: F a -> ParserState -> a +runF = runReader . unF + +askF :: F ParserState +askF = F ask + +asksF :: (ParserState -> a) -> F a +asksF f = F $ asks f + +instance Monoid a => Monoid (F a) where + mempty = return mempty + mappend = liftM2 mappend + mconcat = liftM mconcat . sequence -- | Like >>, but returns the operation on the left. -- (Suggested by Tillmann Rendel on Haskell-cafe list.) @@ -95,62 +184,69 @@ import Text.HTML.TagSoup.Entity ( lookupEntity ) a >>~ b = a >>= \x -> b >> return x -- | Parse any line of text -anyLine :: GenParser Char st [Char] +anyLine :: Parser [Char] st [Char] anyLine = manyTill anyChar newline -- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a - -> GenParser tok st end - -> GenParser tok st [a] +many1Till :: Parser [tok] st a + -> Parser [tok] st end + -> Parser [tok] st [a] many1Till p end = do first <- p rest <- manyTill p end return (first:rest) --- | A more general form of @notFollowedBy@. This one allows any +-- | A more general form of @notFollowedBy@. This one allows any -- type of parser to be specified, and succeeds only if that parser fails. -- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () +notFollowedBy' :: Show b => Parser [a] st b -> Parser [a] st () notFollowedBy' p = try $ join $ do a <- try p return (unexpected (show a)) <|> return (return ()) -- (This version due to Andrew Pimlott on the Haskell mailing list.) --- | Parses one of a list of strings (tried in order). -oneOfStrings :: [String] -> GenParser Char st String -oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings +-- | Parses one of a list of strings (tried in order). +oneOfStrings :: [String] -> Parser [Char] st String +oneOfStrings [] = fail "no strings" +oneOfStrings strs = do + c <- anyChar + let strs' = [xs | (x:xs) <- strs, x == c] + case strs' of + [] -> fail "not found" + z | "" `elem` z -> return [c] + | otherwise -> (c:) `fmap` oneOfStrings strs' -- | Parses a space or tab. -spaceChar :: CharParser st Char +spaceChar :: Parser [Char] st Char spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. -nonspaceChar :: CharParser st Char +nonspaceChar :: Parser [Char] st Char nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r' -- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () +skipSpaces :: Parser [Char] st () skipSpaces = skipMany spaceChar -- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char +blankline :: Parser [Char] st Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] +blanklines :: Parser [Char] st [Char] blanklines = many1 blankline -- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t -- ^ start parser - -> GenParser Char st end -- ^ end parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] -enclosed start end parser = try $ +enclosed :: Parser [Char] st t -- ^ start parser + -> Parser [Char] st end -- ^ end parser + -> Parser [Char] st a -- ^ content parser (to be used repeatedly) + -> Parser [Char] st [a] +enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String +stringAnyCase :: [Char] -> Parser [Char] st String stringAnyCase [] = string "" stringAnyCase (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) @@ -158,7 +254,7 @@ stringAnyCase (x:xs) = do return (firstChar:rest) -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromString :: Parser [tok] st a -> [tok] -> Parser [tok] st a parseFromString parser str = do oldPos <- getPosition oldInput <- getInput @@ -169,8 +265,8 @@ parseFromString parser str = do return result -- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String -lineClump = blanklines +lineClump :: Parser [Char] st String +lineClump = blanklines <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) -- | Parse a string of characters between an open character @@ -178,8 +274,8 @@ lineClump = blanklines -- pairs of open and close, which must be different. For example, -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". -charsInBalanced :: Char -> Char -> GenParser Char st Char - -> GenParser Char st String +charsInBalanced :: Char -> Char -> Parser [Char] st Char + -> Parser [Char] st String charsInBalanced open close parser = try $ do char open let isDelim c = c == open || c == close @@ -204,13 +300,13 @@ uppercaseRomanDigits = map toUpper lowercaseRomanDigits -- | Parses a roman numeral (uppercase or lowercase), returns number. romanNumeral :: Bool -- ^ Uppercase if true - -> GenParser Char st Int + -> Parser [Char] st Int romanNumeral upperCase = do - let romanDigits = if upperCase - then uppercaseRomanDigits + let romanDigits = if upperCase + then uppercaseRomanDigits else lowercaseRomanDigits lookAhead $ oneOf romanDigits - let [one, five, ten, fifty, hundred, fivehundred, thousand] = + let [one, five, ten, fifty, hundred, fivehundred, thousand] = map char romanDigits thousands <- many thousand >>= (return . (1000 *) . length) ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 @@ -234,14 +330,14 @@ romanNumeral upperCase = do -- Parsers for email addresses and URIs -emailChar :: GenParser Char st Char +emailChar :: Parser [Char] st Char emailChar = alphaNum <|> satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.') -domainChar :: GenParser Char st Char +domainChar :: Parser [Char] st Char domainChar = alphaNum <|> char '-' -domain :: GenParser Char st [Char] +domain :: Parser [Char] st [Char] domain = do first <- many1 domainChar dom <- many1 $ try (char '.' >> many1 domainChar ) @@ -249,7 +345,7 @@ domain = do -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: GenParser Char st (String, String) +emailAddress :: Parser [Char] st (String, String) emailAddress = try $ do firstLetter <- alphaNum restAddr <- many emailChar @@ -260,7 +356,7 @@ emailAddress = try $ do return (full, escapeURI $ "mailto:" ++ full) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: GenParser Char st (String, String) +uri :: Parser [Char] st (String, String) uri = try $ do let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ] @@ -294,8 +390,8 @@ uri = try $ do -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement -- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply - -> GenParser Char st (a, Int) -- ^ (result, displacement) +withHorizDisplacement :: Parser [Char] st a -- ^ Parser to apply + -> Parser [Char] st (a, Int) -- ^ (result, displacement) withHorizDisplacement parser = do pos1 <- getPosition result <- parser @@ -304,7 +400,7 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. -withRaw :: GenParser Char st a -> GenParser Char st (a, [Char]) +withRaw :: Parser [Char] st a -> Parser [Char] st (a, [Char]) withRaw parser = do pos1 <- getPosition inp <- getInput @@ -321,26 +417,16 @@ withRaw parser = do -- | Parses a character and returns 'Null' (so that the parser can move on -- if it gets stuck). -nullBlock :: GenParser Char st Block +nullBlock :: Parser [Char] st Block nullBlock = anyChar >> return Null --- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser a ParserState () -failIfStrict = do - state <- getState - if stateStrict state then fail "strict mode" else return () - --- | Fail unless we're in literate haskell mode. -failUnlessLHS :: GenParser tok ParserState () -failUnlessLHS = getState >>= guard . stateLiterateHaskell - -- | Parses backslash, then applies character parser. -escaped :: GenParser Char st Char -- ^ Parser for character to escape - -> GenParser Char st Char +escaped :: Parser [Char] st Char -- ^ Parser for character to escape + -> Parser [Char] st Char escaped parser = try $ char '\\' >> parser -- | Parse character entity. -characterReference :: GenParser Char st Char +characterReference :: Parser [Char] st Char characterReference = try $ do char '&' ent <- many1Till nonspaceChar (char ';') @@ -349,19 +435,19 @@ characterReference = try $ do Nothing -> fail "entity not found" -- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: GenParser Char st (ListNumberStyle, Int) +upperRoman :: Parser [Char] st (ListNumberStyle, Int) upperRoman = do num <- romanNumeral True return (UpperRoman, num) -- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: GenParser Char st (ListNumberStyle, Int) +lowerRoman :: Parser [Char] st (ListNumberStyle, Int) lowerRoman = do num <- romanNumeral False return (LowerRoman, num) -- | Parses a decimal numeral and returns (Decimal, number). -decimal :: GenParser Char st (ListNumberStyle, Int) +decimal :: Parser [Char] st (ListNumberStyle, Int) decimal = do num <- many1 digit return (Decimal, read num) @@ -370,7 +456,7 @@ decimal = do -- returns (DefaultStyle, [next example number]). The next -- example number is incremented in parser state, and the label -- (if present) is added to the label table. -exampleNum :: GenParser Char ParserState (ListNumberStyle, Int) +exampleNum :: Parser [Char] ParserState (ListNumberStyle, Int) exampleNum = do char '@' lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) @@ -384,38 +470,38 @@ exampleNum = do return (Example, num) -- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: GenParser Char st (ListNumberStyle, Int) +defaultNum :: Parser [Char] st (ListNumberStyle, Int) defaultNum = do char '#' return (DefaultStyle, 1) -- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: GenParser Char st (ListNumberStyle, Int) +lowerAlpha :: Parser [Char] st (ListNumberStyle, Int) lowerAlpha = do ch <- oneOf ['a'..'z'] return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: GenParser Char st (ListNumberStyle, Int) +upperAlpha :: Parser [Char] st (ListNumberStyle, Int) upperAlpha = do ch <- oneOf ['A'..'Z'] return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I -romanOne :: GenParser Char st (ListNumberStyle, Int) +romanOne :: Parser [Char] st (ListNumberStyle, Int) romanOne = (char 'i' >> return (LowerRoman, 1)) <|> (char 'I' >> return (UpperRoman, 1)) -- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: GenParser Char ParserState ListAttributes -anyOrderedListMarker = choice $ +anyOrderedListMarker :: Parser [Char] ParserState ListAttributes +anyOrderedListMarker = choice $ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], numParser <- [decimal, exampleNum, defaultNum, romanOne, lowerAlpha, lowerRoman, upperAlpha, upperRoman]] -- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inPeriod :: Parser [Char] st (ListNumberStyle, Int) + -> Parser [Char] st ListAttributes inPeriod num = try $ do (style, start) <- num char '.' @@ -423,18 +509,18 @@ inPeriod num = try $ do then DefaultDelim else Period return (start, style, delim) - + -- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inOneParen :: Parser [Char] st (ListNumberStyle, Int) + -> Parser [Char] st ListAttributes inOneParen num = try $ do (style, start) <- num char ')' return (start, style, OneParen) -- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes +inTwoParens :: Parser [Char] st (ListNumberStyle, Int) + -> Parser [Char] st ListAttributes inTwoParens num = try $ do char '(' (style, start) <- num @@ -443,9 +529,9 @@ inTwoParens num = try $ do -- | Parses an ordered list marker with a given style and delimiter, -- returns number. -orderedListMarker :: ListNumberStyle - -> ListNumberDelim - -> GenParser Char ParserState Int +orderedListMarker :: ListNumberStyle + -> ListNumberDelim + -> Parser [Char] ParserState Int orderedListMarker style delim = do let num = defaultNum <|> -- # can continue any kind of list case style of @@ -465,38 +551,34 @@ orderedListMarker style delim = do return start -- | Parses a character reference and returns a Str element. -charRef :: GenParser Char st Inline +charRef :: Parser [Char] st Inline charRef = do c <- characterReference return $ Str [c] -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState sep - -> GenParser Char ParserState end - -> GenParser Char ParserState [Inline] - -> GenParser Char ParserState Block -tableWith headerParser rowParser lineParser footerParser captionParser = try $ do - caption' <- option [] captionParser +tableWith :: Parser [Char] ParserState ([[Block]], [Alignment], [Int]) + -> ([Int] -> Parser [Char] ParserState [[Block]]) + -> Parser [Char] ParserState sep + -> Parser [Char] ParserState end + -> Parser [Char] ParserState Block +tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy` lineParser + lines' <- rowParser indices `sepEndBy1` lineParser footerParser - caption <- if null caption' - then option [] captionParser - else return caption' - state <- getState - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines' + numColumns <- getOption readerColumns + let widths = if (indices == []) + then replicate (length aligns) 0.0 + else widthsFromIndices numColumns indices + return $ Table [] aligns widths heads lines' -- Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal -> [Int] -- Indices -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns' indices = +widthsFromIndices _ [] = [] +widthsFromIndices numColumns' indices = let numColumns = max numColumns' (if null indices then 0 else last indices) lengths' = zipWith (-) indices (0:indices) lengths = reverse $ @@ -516,28 +598,30 @@ widthsFromIndices numColumns' indices = fracs = map (\l -> (fromIntegral l) / quotient) lengths in tail fracs +--- + -- Parse a grid table: starts with row of '-' on top, then header -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: GenParser Char ParserState Block -- ^ Block parser - -> GenParser Char ParserState [Inline] -- ^ Caption parser +gridTableWith :: Parser [Char] ParserState [Block] -- ^ Block list parser -> Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTableWith block tableCaption headless = - tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption + -> Parser [Char] ParserState Block +gridTableWith blocks headless = + tableWith (gridTableHeader headless blocks) (gridTableRow blocks) + (gridTableSep '-') gridTableFooter gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitStringByIndices (init indices) $ removeTrailingSpace line -gridPart :: Char -> GenParser Char st (Int, Int) +gridPart :: Char -> Parser [Char] st (Int, Int) gridPart ch = do dashes <- many1 (char ch) char '+' return (length dashes, length dashes + 1) -gridDashedLines :: Char -> GenParser Char st [(Int,Int)] +gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline removeFinalBar :: String -> String @@ -545,18 +629,18 @@ removeFinalBar = reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. -gridTableSep :: Char -> GenParser Char ParserState Char +gridTableSep :: Char -> Parser [Char] ParserState Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) -gridTableHeader headless block = try $ do + -> Parser [Char] ParserState [Block] + -> Parser [Char] ParserState ([[Block]], [Alignment], [Int]) +gridTableHeader headless blocks = try $ do optional blanklines dashes <- gridDashedLines '-' rawContent <- if headless - then return $ repeat "" + then return $ repeat "" else many1 (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) @@ -571,25 +655,25 @@ gridTableHeader headless block = try $ do then replicate (length dashes) "" else map (intercalate " ") $ transpose $ map (gridTableSplitLine indices) rawContent - heads <- mapM (parseFromString $ many block) $ + heads <- mapM (parseFromString blocks) $ map removeLeadingTrailingSpace rawHeads return (heads, aligns, indices) -gridTableRawLine :: [Int] -> GenParser Char ParserState [String] +gridTableRawLine :: [Int] -> Parser [Char] ParserState [String] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline return (gridTableSplitLine indices line) -- | Parse row of grid table. -gridTableRow :: GenParser Char ParserState Block +gridTableRow :: Parser [Char] ParserState [Block] -> [Int] - -> GenParser Char ParserState [[Block]] -gridTableRow block indices = do + -> Parser [Char] ParserState [[Block]] +gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ transpose colLines - mapM (liftM compactifyCell . parseFromString (many block)) cols + mapM (liftM compactifyCell . parseFromString blocks) cols removeOneLeadingSpace :: [String] -> [String] removeOneLeadingSpace xs = @@ -603,23 +687,23 @@ compactifyCell :: [Block] -> [Block] compactifyCell bs = head $ compactify [bs] -- | Parse footer for a grid table. -gridTableFooter :: GenParser Char ParserState [Char] +gridTableFooter :: Parser [Char] ParserState [Char] gridTableFooter = blanklines --- -- | Parse a string with a given parser and state. -readWith :: GenParser t ParserState a -- ^ parser +readWith :: Parser [t] ParserState a -- ^ parser -> ParserState -- ^ initial state -> [t] -- ^ input -> a -readWith parser state input = +readWith parser state input = case runParser parser state "source" input of Left err' -> error $ "\nError:\n" ++ show err' Right result -> result -- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => GenParser Char ParserState a +testStringWith :: (Show a) => Parser [Char] ParserState a -> String -> IO () testStringWith parser str = UTF8.putStrLn $ show $ @@ -627,72 +711,67 @@ testStringWith parser str = UTF8.putStrLn $ show $ -- | Parsing options. data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? + { stateOptions :: ReaderOptions, -- ^ User options stateParserContext :: ParserContext, -- ^ Inside list? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateAllowLinks :: Bool, -- ^ Allow parsing of links stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed - stateKeys :: KeyTable, -- ^ List of reference keys - stateCitations :: [String], -- ^ List of available citations - stateNotes :: NoteTable, -- ^ List of notes - stateTabStop :: Int, -- ^ Tab stop - stateStandalone :: Bool, -- ^ Parse bibliographic info? + stateKeys :: KeyTable, -- ^ List of reference keys (with fallbacks) + stateNotes :: NoteTable, -- ^ List of notes (raw bodies) + stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies) stateTitle :: [Inline], -- ^ Title of document stateAuthors :: [[Inline]], -- ^ Authors of document stateDate :: [Inline], -- ^ Date of document - stateStrict :: Bool, -- ^ Use strict markdown syntax? - stateSmart :: Bool, -- ^ Use smart typography? - stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior - -- in parsing dashes; -- is em-dash; - -- before numeral is en-dash - stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell - stateColumns :: Int, -- ^ Number of columns in terminal stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks stateNextExample :: Int, -- ^ Number of next example - stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers + stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers stateHasChapters :: Bool, -- ^ True if \chapter encountered - stateApplyMacros :: Bool, -- ^ Apply LaTeX macros? stateMacros :: [Macro], -- ^ List of macros defined so far stateRstDefaultRole :: String -- ^ Current rST default interpreted text role } - deriving Show + +instance Default ParserState where + def = defaultParserState defaultParserState :: ParserState -defaultParserState = - ParserState { stateParseRaw = False, +defaultParserState = + ParserState { stateOptions = def, stateParserContext = NullState, stateQuoteContext = NoQuote, + stateAllowLinks = True, stateMaxNestingLevel = 6, stateLastStrPos = Nothing, stateKeys = M.empty, - stateCitations = [], stateNotes = [], - stateTabStop = 4, - stateStandalone = False, + stateNotes' = [], stateTitle = [], stateAuthors = [], stateDate = [], - stateStrict = False, - stateSmart = False, - stateOldDashes = False, - stateLiterateHaskell = False, - stateColumns = 80, stateHeaderTable = [], - stateIndentedCodeClasses = [], stateNextExample = 1, stateExamples = M.empty, stateHasChapters = False, - stateApplyMacros = True, stateMacros = [], stateRstDefaultRole = "title-reference"} -data HeaderType +getOption :: (ReaderOptions -> a) -> Parser s ParserState a +getOption f = (f . stateOptions) `fmap` getState + +-- | Succeed only if the extension is enabled. +guardEnabled :: Extension -> Parser s ParserState () +guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext + +-- | Succeed only if the extension is disabled. +guardDisabled :: Extension -> Parser s ParserState () +guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext + +data HeaderType = SingleHeader Char -- ^ Single line of characters underneath | DoubleHeader Char -- ^ Lines of characters above and below deriving (Eq, Show) -data ParserContext +data ParserContext = ListItemState -- ^ Used when running parser on list item contents | NullState -- ^ Default state deriving (Eq, Show) @@ -705,51 +784,35 @@ data QuoteContext type NoteTable = [(String, String)] -newtype Key = Key [Inline] deriving (Show, Read, Eq, Ord) +type NoteTable' = [(String, F Blocks)] -- used in markdown reader -toKey :: [Inline] -> Key -toKey = Key . bottomUp lowercase - where lowercase :: Inline -> Inline - lowercase (Str xs) = Str (map toLower xs) - lowercase (Math t xs) = Math t (map toLower xs) - lowercase (Code attr xs) = Code attr (map toLower xs) - lowercase (RawInline f xs) = RawInline f (map toLower xs) - lowercase LineBreak = Space - lowercase x = x +newtype Key = Key String deriving (Show, Read, Eq, Ord) -fromKey :: Key -> [Inline] -fromKey (Key xs) = xs +toKey :: String -> Key +toKey = Key . map toLower . unwords . words type KeyTable = M.Map Key Target --- | Look up key in key table and return target object. -lookupKeySrc :: KeyTable -- ^ Key table - -> Key -- ^ Key - -> Maybe Target -lookupKeySrc table key = case M.lookup key table of - Nothing -> Nothing - Just src -> Just src - -- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: GenParser tok ParserState () -failUnlessSmart = getState >>= guard . stateSmart +failUnlessSmart :: Parser [tok] ParserState () +failUnlessSmart = getOption readerSmart >>= guard -smartPunctuation :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +smartPunctuation :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline smartPunctuation inlineParser = do failUnlessSmart choice [ quoted inlineParser, apostrophe, dash, ellipses ] -apostrophe :: GenParser Char ParserState Inline +apostrophe :: Parser [Char] ParserState Inline apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019") -quoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +quoted :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser withQuoteContext :: QuoteContext - -> (GenParser Char ParserState Inline) - -> GenParser Char ParserState Inline + -> Parser [Char] ParserState a + -> Parser [Char] ParserState a withQuoteContext context parser = do oldState <- getState let oldQuoteContext = stateQuoteContext oldState @@ -759,39 +822,39 @@ withQuoteContext context parser = do setState newState { stateQuoteContext = oldQuoteContext } return result -singleQuoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +singleQuoted :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline singleQuoted inlineParser = try $ do singleQuoteStart withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= return . Quoted SingleQuote . normalizeSpaces -doubleQuoted :: GenParser Char ParserState Inline - -> GenParser Char ParserState Inline +doubleQuoted :: Parser [Char] ParserState Inline + -> Parser [Char] ParserState Inline doubleQuoted inlineParser = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ do contents <- manyTill inlineParser doubleQuoteEnd return . Quoted DoubleQuote . normalizeSpaces $ contents -failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () +failIfInQuoteContext :: QuoteContext -> Parser [tok] ParserState () failIfInQuoteContext context = do st <- getState if stateQuoteContext st == context then fail "already inside quotes" else return () -charOrRef :: [Char] -> GenParser Char st Char +charOrRef :: [Char] -> Parser [Char] st Char charOrRef cs = oneOf cs <|> try (do c <- characterReference guard (c `elem` cs) return c) -updateLastStrPos :: GenParser Char ParserState () -updateLastStrPos = getPosition >>= \p -> +updateLastStrPos :: Parser [Char] ParserState () +updateLastStrPos = getPosition >>= \p -> updateState $ \s -> s{ stateLastStrPos = Just p } -singleQuoteStart :: GenParser Char ParserState () +singleQuoteStart :: Parser [Char] ParserState () singleQuoteStart = do failIfInQuoteContext InSingleQuote pos <- getPosition @@ -802,61 +865,61 @@ singleQuoteStart = do notFollowedBy (oneOf ")!],;:-? \t\n") notFollowedBy (char '.') <|> lookAhead (string "..." >> return ()) notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) + satisfy (not . isAlphaNum))) -- possess/contraction return () -singleQuoteEnd :: GenParser Char st () +singleQuoteEnd :: Parser [Char] st () singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum -doubleQuoteStart :: GenParser Char ParserState () +doubleQuoteStart :: Parser [Char] ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) -doubleQuoteEnd :: GenParser Char st () +doubleQuoteEnd :: Parser [Char] st () doubleQuoteEnd = do charOrRef "\"\8221\148" return () -ellipses :: GenParser Char st Inline +ellipses :: Parser [Char] st Inline ellipses = do try (charOrRef "\8230\133") <|> try (string "..." >> return '…') return (Str "\8230") -dash :: GenParser Char ParserState Inline +dash :: Parser [Char] ParserState Inline dash = do - oldDashes <- stateOldDashes `fmap` getState + oldDashes <- getOption readerOldDashes if oldDashes then emDashOld <|> enDashOld else Str `fmap` (hyphenDash <|> emDash <|> enDash) -- Two hyphens = en-dash, three = em-dash -hyphenDash :: GenParser Char st String +hyphenDash :: Parser [Char] st String hyphenDash = do try $ string "--" option "\8211" (char '-' >> return "\8212") -emDash :: GenParser Char st String +emDash :: Parser [Char] st String emDash = do try (charOrRef "\8212\151") return "\8212" -enDash :: GenParser Char st String +enDash :: Parser [Char] st String enDash = do try (charOrRef "\8212\151") return "\8211" -enDashOld :: GenParser Char st Inline +enDashOld :: Parser [Char] st Inline enDashOld = do try (charOrRef "\8211\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') return (Str "\8211") -emDashOld :: GenParser Char st Inline +emDashOld :: Parser [Char] st Inline emDashOld = do try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') return (Str "\8212") @@ -866,24 +929,24 @@ emDashOld = do -- -- | Parse a \newcommand or \renewcommand macro definition. -macro :: GenParser Char ParserState Block +macro :: Parser [Char] ParserState Block macro = do - apply <- stateApplyMacros `fmap` getState + apply <- getOption readerApplyMacros inp <- getInput case parseMacroDefinitions inp of - ([], _) -> pzero - (ms, rest) -> do def <- count (length inp - length rest) anyChar + ([], _) -> mzero + (ms, rest) -> do def' <- count (length inp - length rest) anyChar if apply then do updateState $ \st -> st { stateMacros = ms ++ stateMacros st } return Null - else return $ RawBlock "latex" def + else return $ RawBlock "latex" def' -- | Apply current macros to string. -applyMacros' :: String -> GenParser Char ParserState String +applyMacros' :: String -> Parser [Char] ParserState String applyMacros' target = do - apply <- liftM stateApplyMacros getState + apply <- getOption readerApplyMacros if apply then do macros <- liftM stateMacros getState return $ applyMacros macros target diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 0372dbe5d..211fdf20e 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA {- | Module : Text.Pandoc.Pretty Copyright : Copyright (C) 2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -202,18 +202,17 @@ chomp d = Doc (fromList dl') outp :: (IsString a, Monoid a) => Int -> String -> DocState a -outp off s | off <= 0 = do +outp off s | off < 0 = do -- offset < 0 means newline characters st' <- get let rawpref = prefix st' when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do let pref = reverse $ dropWhile isSpace $ reverse rawpref modify $ \st -> st{ output = fromString pref : output st , column = column st + realLength pref } - when (off < 0) $ do - modify $ \st -> st { output = fromString s : output st - , column = 0 - , newlines = newlines st + 1 } -outp off s = do + modify $ \st -> st { output = fromString s : output st + , column = 0 + , newlines = newlines st + 1 } +outp off s = do -- offset >= 0 (0 might be combining char) st' <- get let pref = prefix st' when (column st' == 0 && usePrefix st' && not (null pref)) $ do diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 62f7c61a0..685fa1ee4 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,6 +1,6 @@ module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Data.Char (toUpper, isDigit) -import Text.Pandoc.Parsing (ParserState(..)) +import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder import Text.XML.Light @@ -455,13 +455,13 @@ List of all DocBook tags, with [x] indicating implemented, [x] tocfront - An entry in a table of contents for a front matter component [x] toclevel1 - A top-level entry within a table of contents entry for a chapter-like component -[x] toclevel2 - A second-level entry within a table of contents entry for a +[x] toclevel2 - A second-level entry within a table of contents entry for a chapter-like component -[x] toclevel3 - A third-level entry within a table of contents entry for a +[x] toclevel3 - A third-level entry within a table of contents entry for a chapter-like component -[x] toclevel4 - A fourth-level entry within a table of contents entry for a +[x] toclevel4 - A fourth-level entry within a table of contents entry for a chapter-like component -[x] toclevel5 - A fifth-level entry within a table of contents entry for a +[x] toclevel5 - A fifth-level entry within a table of contents entry for a chapter-like component [x] tocpart - An entry in a table of contents for a part of a book [ ] token - A unit of information @@ -503,7 +503,7 @@ data DBState = DBState{ dbSectionLevel :: Int , dbBook :: Bool } deriving Show -readDocBook :: ParserState -> String -> Pandoc +readDocBook :: ReaderOptions -> String -> Pandoc readDocBook _ inp = setTitle (dbDocTitle st') $ setAuthors (dbDocAuthors st') $ setDate (dbDocDate st') @@ -574,7 +574,7 @@ addToStart toadd bs = (Para xs : rest) -> para (toadd <> fromList xs) <> fromList rest _ -> bs --- function that is used by both mediaobject (in parseBlock) +-- function that is used by both mediaobject (in parseBlock) -- and inlinemediaobject (in parseInline) getImage :: Element -> DB Inlines getImage e = do diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 536bddd39..e5c310ffc 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -19,10 +19,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.HTML Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of HTML to 'Pandoc' document. @@ -36,18 +36,17 @@ module Text.Pandoc.Readers.HTML ( readHtml , isCommentTag ) where -import Text.ParserCombinators.Parsec -import Text.ParserCombinators.Parsec.Pos import Text.HTML.TagSoup import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import Text.Pandoc.Builder (text, toList) import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) -import Data.Char ( isDigit, toLower ) -import Control.Monad ( liftM, guard, when ) +import Data.Char ( isDigit ) +import Control.Monad ( liftM, guard, when, mzero ) isSpace :: Char -> Bool isSpace ' ' = True @@ -56,11 +55,11 @@ isSpace '\n' = True isSpace _ = False -- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: ParserState -- ^ Parser state +readHtml :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc -readHtml st inp = Pandoc meta blocks - where blocks = readWith parseBody st rest +readHtml opts inp = Pandoc meta blocks + where blocks = readWith parseBody def{ stateOptions = opts } rest tags = canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp hasHeader = any (~== TagOpen "head" []) tags @@ -68,7 +67,7 @@ readHtml st inp = Pandoc meta blocks then parseHeader tags else (Meta [] [] [], tags) -type TagParser = GenParser (Tag String) ParserState +type TagParser = Parser [Tag String] ParserState -- TODO - fix this - not every header has a title tag parseHeader :: [Tag String] -> (Meta, [Tag String]) @@ -96,18 +95,6 @@ block = choice , pRawHtmlBlock ] --- repeated in SelfContained -- consolidate eventually -renderTags' :: [Tag String] -> String -renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } - pList :: TagParser [Block] pList = pBulletList <|> pOrderedList <|> pDefinitionList @@ -126,25 +113,22 @@ pBulletList = try $ do pOrderedList :: TagParser [Block] pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) - st <- getState - let (start, style) = if stateStrict st - then (1, DefaultStyle) - else (sta', sty') - where sta = fromMaybe "1" $ - lookup "start" attribs - sta' = if all isDigit sta - then read sta - else 1 - sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> DefaultStyle + let (start, style) = (sta', sty') + where sta = fromMaybe "1" $ + lookup "start" attribs + sta' = if all isDigit sta + then read sta + else 1 + sty = fromMaybe (fromMaybe "" $ + lookup "style" attribs) $ + lookup "class" attribs + sty' = case sty of + "lower-roman" -> LowerRoman + "upper-roman" -> UpperRoman + "lower-alpha" -> LowerAlpha + "upper-alpha" -> UpperAlpha + "decimal" -> Decimal + _ -> DefaultStyle let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && not (t ~== TagClose "ol")) @@ -196,8 +180,8 @@ pRawTag = do pRawHtmlBlock :: TagParser [Block] pRawHtmlBlock = do raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag - state <- getState - if stateParseRaw state && not (null raw) + parseRaw <- getOption readerParseRaw + if parseRaw && not (null raw) then return [RawBlock "html" raw] else return [] @@ -235,7 +219,7 @@ pSimpleTable = try $ do rows <- pOptInTag "tbody" $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank - TagClose _ <- pSatisfy (~== TagClose "table") + TagClose _ <- pSatisfy (~== TagClose "table") let cols = maximum $ map length rows let aligns = replicate cols AlignLeft let widths = replicate cols 0 @@ -281,10 +265,7 @@ pCodeBlock = try $ do let attribsId = fromMaybe "" $ lookup "id" attr let attribsClasses = words $ fromMaybe "" $ lookup "class" attr let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - st <- getState - let attribs = if stateStrict st - then ("",[],[]) - else (attribsId, attribsClasses, attribsKV) + let attribs = (attribsId, attribsClasses, attribsKV) return [CodeBlock attribs result] inline :: TagParser [Inline] @@ -310,7 +291,7 @@ pLocation = do pSat :: (Tag String -> Bool) -> TagParser (Tag String) pSat f = do pos <- getPosition - token show (const pos) (\x -> if f x then Just x else Nothing) + token show (const pos) (\x -> if f x then Just x else Nothing) pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String) pSatisfy f = try $ optional pLocation >> pSat f @@ -332,14 +313,13 @@ pStrong :: TagParser [Inline] pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong pSuperscript :: TagParser [Inline] -pSuperscript = failIfStrict >> pInlinesInTags "sup" Superscript +pSuperscript = pInlinesInTags "sup" Superscript pSubscript :: TagParser [Inline] -pSubscript = failIfStrict >> pInlinesInTags "sub" Subscript +pSubscript = pInlinesInTags "sub" Subscript pStrikeout :: TagParser [Inline] pStrikeout = do - failIfStrict pInlinesInTags "s" Strikeout <|> pInlinesInTags "strike" Strikeout <|> pInlinesInTags "del" Strikeout <|> @@ -381,8 +361,8 @@ pCode = try $ do pRawHtmlInline :: TagParser [Inline] pRawHtmlInline = do result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag - state <- getState - if stateParseRaw state + parseRaw <- getOption readerParseRaw + if parseRaw then return [RawInline "html" $ renderTags' [result]] else return [] @@ -417,7 +397,7 @@ pCloses tagtype = try $ do (TagClose "ul") | tagtype == "li" -> return () (TagClose "ol") | tagtype == "li" -> return () (TagClose "dl") | tagtype == "li" -> return () - _ -> pzero + _ -> mzero pTagText :: TagParser [Inline] pTagText = try $ do @@ -432,11 +412,11 @@ pBlank = try $ do (TagText str) <- pSatisfy isTagText guard $ all isSpace str -pTagContents :: GenParser Char ParserState Inline +pTagContents :: Parser [Char] ParserState Inline pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad -pStr :: GenParser Char ParserState Inline +pStr :: Parser [Char] ParserState Inline pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) @@ -455,13 +435,13 @@ isSpecial '\8220' = True isSpecial '\8221' = True isSpecial _ = False -pSymbol :: GenParser Char ParserState Inline +pSymbol :: Parser [Char] ParserState Inline pSymbol = satisfy isSpecial >>= return . Str . (:[]) isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML -pBad :: GenParser Char ParserState Inline +pBad :: Parser [Char] ParserState Inline pBad = do c <- satisfy isBad let c' = case c of @@ -495,7 +475,7 @@ pBad = do _ -> '?' return $ Str [c'] -pSpace :: GenParser Char ParserState Inline +pSpace :: Parser [Char] ParserState Inline pSpace = many1 (satisfy isSpace) >> return Space -- @@ -593,20 +573,19 @@ _ `closes` _ = False --- parsers for use in markdown, textile readers -- | Matches a stretch of HTML in balanced tags. -htmlInBalanced :: (Tag String -> Bool) -> GenParser Char ParserState String +htmlInBalanced :: (Tag String -> Bool) -> Parser [Char] ParserState String htmlInBalanced f = try $ do (TagOpen t _, tag) <- htmlTag f guard $ '/' `notElem` tag -- not a self-closing tag - let nonTagChunk = many1 $ satisfy (/= '<') let stopper = htmlTag (~== TagClose t) let anytag = liftM snd $ htmlTag (const True) contents <- many $ notFollowedBy' stopper >> - (nonTagChunk <|> htmlInBalanced (const True) <|> anytag) + (htmlInBalanced f <|> anytag <|> count 1 anyChar) endtag <- liftM snd stopper return $ tag ++ concat contents ++ endtag -- | Matches a tag meeting a certain condition. -htmlTag :: (Tag String -> Bool) -> GenParser Char ParserState (Tag String, String) +htmlTag :: (Tag String -> Bool) -> Parser [Char] ParserState (Tag String, String) htmlTag f = try $ do lookAhead (char '<') (next : _) <- getInput >>= return . canonicalizeTags . parseTags @@ -617,7 +596,7 @@ htmlTag f = try $ do count (length s + 4) anyChar skipMany (satisfy (/='>')) char '>' - return (next, "<!--" ++ s ++ "-->") + return (next, "<!--" ++ s ++ "-->") _ -> do rendered <- manyTill anyChar (char '>') return (next, rendered ++ ">") diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 3178945e4..4a5a14d6a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,10 +33,10 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, handleIncludes ) where -import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional) import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Parsing +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad @@ -47,12 +47,13 @@ import Data.Monoid import System.FilePath (replaceExtension) import Data.List (intercalate) import qualified Data.Map as M +import qualified Control.Exception as E -- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: ParserState -- ^ Parser state, including options for parser +readLaTeX :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc -readLaTeX = readWith parseLaTeX +readLaTeX opts = readWith parseLaTeX def{ stateOptions = opts } parseLaTeX :: LP Pandoc parseLaTeX = do @@ -64,7 +65,7 @@ parseLaTeX = do let date' = stateDate st return $ Pandoc (Meta title' authors' date') $ toList bs -type LP = GenParser Char ParserState +type LP = Parser [Char] ParserState anyControlSeq :: LP String anyControlSeq = do @@ -186,7 +187,7 @@ inline = (mempty <$ comment) <|> (mathInline $ char '$' *> mathChars <* char '$') <|> (superscript <$> (char '^' *> tok)) <|> (subscript <$> (char '_' *> tok)) - <|> (failUnlessLHS *> char '|' *> doLHSverb) + <|> (guardEnabled Ext_literate_haskell *> char '|' *> doLHSverb) <|> (str <$> count 1 tildeEscape) <|> (str <$> string "]") <|> (str <$> string "#") -- TODO print warning? @@ -230,14 +231,14 @@ ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawInline "latex" . (contseq ++) . snd) <$> - (getState >>= guard . stateParseRaw >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> (withRaw optargs)) ignoreBlocks :: String -> (String, LP Blocks) ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) where optargs = skipopts *> skipMany (try $ optional sp *> braced) contseq = '\\':name doraw = (rawBlock "latex" . (contseq ++) . snd) <$> - (getState >>= guard . stateParseRaw >> (withRaw optargs)) + (getOption readerParseRaw >>= guard >> (withRaw optargs)) blockCommands :: M.Map String (LP Blocks) blockCommands = M.fromList $ @@ -321,7 +322,7 @@ inlineCommand :: LP Inlines inlineCommand = try $ do name <- anyControlSeq guard $ not $ isBlockCommand name - parseRaw <- stateParseRaw `fmap` getState + parseRaw <- getOption readerParseRaw star <- option "" (string "*") let name' = name ++ star let rawargs = withRaw (skipopts *> option "" dimenarg @@ -336,7 +337,7 @@ inlineCommand = try $ do Nothing -> raw unlessParseRaw :: LP () -unlessParseRaw = getState >>= guard . not . stateParseRaw +unlessParseRaw = getOption readerParseRaw >>= guard . not isBlockCommand :: String -> Bool isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands @@ -660,7 +661,7 @@ environment = do rawEnv :: String -> LP Blocks rawEnv name = do let addBegin x = "\\begin{" ++ name ++ "}" ++ x - parseRaw <- stateParseRaw `fmap` getState + parseRaw <- getOption readerParseRaw if parseRaw then (rawBlock "latex" . addBegin) <$> (withRaw (env name blocks) >>= applyMacros' . snd) @@ -671,8 +672,9 @@ handleIncludes :: String -> IO String handleIncludes [] = return [] handleIncludes ('\\':xs) = case runParser include defaultParserState "input" ('\\':xs) of - Right (fs, rest) -> do let getfile f = catch (UTF8.readFile f) - (\_ -> return "") + Right (fs, rest) -> do let getfile f = E.catch (UTF8.readFile f) + (\e -> let _ = (e :: E.SomeException) + in return "") yss <- mapM getfile fs (intercalate "\n" yss ++) `fmap` handleIncludes rest @@ -713,10 +715,10 @@ verbatimEnv = do rest <- getInput return (r,rest) -rawLaTeXBlock :: GenParser Char ParserState String +rawLaTeXBlock :: Parser [Char] ParserState String rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand) -rawLaTeXInline :: GenParser Char ParserState Inline +rawLaTeXInline :: Parser [Char] ParserState Inline rawLaTeXInline = do (res, raw) <- withRaw inlineCommand if res == mempty @@ -735,7 +737,7 @@ environments = M.fromList , ("itemize", bulletList <$> listenv "itemize" (many item)) , ("description", definitionList <$> listenv "description" (many descItem)) , ("enumerate", ordered_list) - , ("code", failUnlessLHS *> + , ("code", guardEnabled Ext_literate_haskell *> (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> verbEnv "code")) , ("verbatim", codeBlock <$> (verbEnv "verbatim")) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 51a727996..2407e137c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,4 +1,6 @@ {-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7 +{-# LANGUAGE FlexibleInstances, TypeSynonymInstances, + GeneralizedNewtypeDeriving #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -20,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Markdown Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -33,26 +35,34 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown ) where import Data.List ( transpose, sortBy, findIndex, intercalate ) import qualified Data.Map as M import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) +import Data.Char ( isAlphaNum, toLower ) import Data.Maybe import Text.Pandoc.Definition -import Text.Pandoc.Generic -import Text.Pandoc.Shared -import Text.Pandoc.Parsing +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>)) +import Text.Pandoc.Options +import Text.Pandoc.Shared hiding (compactify) +import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) import Text.Pandoc.XML ( fromEntities ) -import Text.ParserCombinators.Parsec -import Control.Monad (when, liftM, guard, mzero) +import Data.Monoid (mconcat, mempty) +import Control.Applicative ((<$>), (<*), (*>), (<$)) +import Control.Monad import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) +import qualified Data.Set as Set -- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse (assuming @'\n'@ line endings) +readMarkdown :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n") +readMarkdown opts s = + (readWith parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n") + +trimInlinesF :: F Inlines -> F Inlines +trimInlinesF = liftM trimInlines -- -- Constants and data structure definitions @@ -70,7 +80,7 @@ isHruleChar '-' = True isHruleChar '_' = True isHruleChar _ = False -setextHChars :: [Char] +setextHChars :: String setextHChars = "=-" isBlank :: Char -> Bool @@ -83,71 +93,72 @@ isBlank _ = False -- auxiliary functions -- -indentSpaces :: GenParser Char ParserState [Char] +isNull :: F Inlines -> Bool +isNull ils = B.isNull $ runF ils def + +spnl :: Parser [Char] st () +spnl = try $ do + skipSpaces + optional newline + skipSpaces + notFollowedBy (char '\n') + +indentSpaces :: Parser [Char] ParserState String indentSpaces = try $ do - state <- getState - let tabStop = stateTabStop state + tabStop <- getOption readerTabStop count tabStop (char ' ') <|> string "\t" <?> "indentation" -nonindentSpaces :: GenParser Char ParserState [Char] +nonindentSpaces :: Parser [Char] ParserState String nonindentSpaces = do - state <- getState - let tabStop = stateTabStop state + tabStop <- getOption readerTabStop sps <- many (char ' ') - if length sps < tabStop + if length sps < tabStop then return sps else unexpected "indented line" -skipNonindentSpaces :: GenParser Char ParserState () +skipNonindentSpaces :: Parser [Char] ParserState () skipNonindentSpaces = do - state <- getState - atMostSpaces (stateTabStop state - 1) + tabStop <- getOption readerTabStop + atMostSpaces (tabStop - 1) -atMostSpaces :: Int -> GenParser Char ParserState () +atMostSpaces :: Int -> Parser [Char] ParserState () atMostSpaces 0 = notFollowedBy (char ' ') atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return () -litChar :: GenParser Char ParserState Char +litChar :: Parser [Char] ParserState Char litChar = escapedChar' <|> noneOf "\n" <|> (newline >> notFollowedBy blankline >> return ' ') --- | Fail unless we're at beginning of a line. -failUnlessBeginningOfLine :: GenParser tok st () -failUnlessBeginningOfLine = do - pos <- getPosition - if sourceColumn pos == 1 then return () else fail "not beginning of line" - -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: GenParser Char ParserState Inline - -> GenParser Char ParserState [Inline] -inlinesInBalancedBrackets parser = try $ do +inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines) +inlinesInBalancedBrackets = try $ do char '[' - result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser - guard (res == "[") - bal <- inlinesInBalancedBrackets parser - return $ [Str "["] ++ bal ++ [Str "]"]) - <|> (count 1 parser)) + result <- manyTill ( (do lookAhead $ try $ do x <- inline + guard (runF x def == B.str "[") + bal <- inlinesInBalancedBrackets + return $ (\x -> B.str "[" <> x <> B.str "]") <$> bal) + <|> inline) (char ']') - return $ concat result + return $ mconcat result -- -- document structure -- -titleLine :: GenParser Char ParserState [Inline] +titleLine :: Parser [Char] ParserState (F Inlines) titleLine = try $ do char '%' skipSpaces res <- many $ (notFollowedBy newline >> inline) <|> try (endline >> whitespace) newline - return $ normalizeSpaces res + return $ trimInlinesF $ mconcat res -authorsLine :: GenParser Char ParserState [[Inline]] -authorsLine = try $ do +authorsLine :: Parser [Char] ParserState (F [Inlines]) +authorsLine = try $ do char '%' skipSpaces authors <- sepEndBy (many (notFollowedBy (satisfy $ \c -> @@ -155,67 +166,63 @@ authorsLine = try $ do (char ';' <|> try (newline >> notFollowedBy blankline >> spaceChar)) newline - return $ filter (not . null) $ map normalizeSpaces authors + return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors -dateLine :: GenParser Char ParserState [Inline] +dateLine :: Parser [Char] ParserState (F Inlines) dateLine = try $ do char '%' skipSpaces - date <- manyTill inline newline - return $ normalizeSpaces date - -titleBlock :: GenParser Char ParserState ([Inline], [[Inline]], [Inline]) -titleBlock = try $ do - failIfStrict - title <- option [] titleLine - author <- option [] authorsLine - date <- option [] dateLine + trimInlinesF . mconcat <$> manyTill inline newline + +titleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +titleBlock = pandocTitleBlock <|> mmdTitleBlock + +pandocTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +pandocTitleBlock = try $ do + guardEnabled Ext_pandoc_title_block + title <- option mempty titleLine + author <- option (return []) authorsLine + date <- option mempty dateLine optional blanklines return (title, author, date) -parseMarkdown :: GenParser Char ParserState Pandoc +mmdTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +mmdTitleBlock = try $ do + guardEnabled Ext_mmd_title_block + kvPairs <- many1 kvPair + blanklines + let title = maybe mempty return $ lookup "title" kvPairs + let author = maybe mempty (\x -> return [x]) $ lookup "author" kvPairs + let date = maybe mempty return $ lookup "date" kvPairs + return (title, author, date) + +kvPair :: Parser [Char] ParserState (String, Inlines) +kvPair = try $ do + key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') + val <- manyTill anyChar + (try $ newline >> lookAhead (blankline <|> nonspaceChar)) + let key' = concat $ words $ map toLower key + let val' = trimInlines $ B.text val + return (key',val') + +parseMarkdown :: Parser [Char] ParserState Pandoc parseMarkdown = do -- markdown allows raw HTML - updateState (\state -> state { stateParseRaw = True }) - startPos <- getPosition - -- go through once just to get list of reference keys and notes - -- docMinusKeys is the raw document with blanks where the keys/notes were... - st <- getState - let firstPassParser = referenceKey - <|> (if stateStrict st then pzero else noteBlock) - <|> liftM snd (withRaw codeBlockDelimited) - <|> lineClump - docMinusKeys <- liftM concat $ manyTill firstPassParser eof - setInput docMinusKeys - setPosition startPos - st' <- getState - let reversedNotes = stateNotes st' - updateState $ \s -> s { stateNotes = reverse reversedNotes } - -- now parse it for real... - (title, author, date) <- option ([],[],[]) titleBlock + updateState $ \state -> state { stateOptions = + let oldOpts = stateOptions state in + oldOpts{ readerParseRaw = True } } + (title, authors, date) <- option (mempty,return [],mempty) titleBlock blocks <- parseBlocks - let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks - -- if there are labeled examples, change references into numbers - examples <- liftM stateExamples getState - let handleExampleRef :: Inline -> Inline - handleExampleRef z@(Str ('@':xs)) = - case M.lookup xs examples of - Just n -> Str (show n) - Nothing -> z - handleExampleRef z = z - if M.null examples - then return doc - else return $ bottomUp handleExampleRef doc - --- --- initial pass for references and notes --- + st <- getState + return $ B.setTitle (runF title st) + $ B.setAuthors (runF authors st) + $ B.setDate (runF date st) + $ B.doc $ runF blocks st -referenceKey :: GenParser Char ParserState [Char] +referenceKey :: Parser [Char] ParserState (F Blocks) referenceKey = try $ do - startPos <- getPosition skipNonindentSpaces - lab <- reference + (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') let sourceURL = liftM unwords $ many $ try $ do @@ -223,22 +230,20 @@ referenceKey = try $ do skipMany spaceChar optional $ newline >> notFollowedBy blankline skipMany spaceChar - notFollowedBy' reference + notFollowedBy' (() <$ reference) many1 $ escapedChar' <|> satisfy (not . isBlank) let betweenAngles = try $ char '<' >> manyTill (escapedChar' <|> litChar) (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle blanklines - endPos <- getPosition let target = (escapeURI $ removeTrailingSpace src, tit) st <- getState let oldkeys = stateKeys st - updateState $ \s -> s { stateKeys = M.insert (toKey lab) target oldkeys } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + updateState $ \s -> s { stateKeys = M.insert (toKey raw) target oldkeys } + return $ return mempty -referenceTitle :: GenParser Char ParserState String +referenceTitle :: Parser [Char] ParserState String referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words) @@ -247,25 +252,38 @@ referenceTitle = try $ do notFollowedBy (noneOf ")\n"))) return $ fromEntities tit -noteMarker :: GenParser Char ParserState [Char] +-- | PHP Markdown Extra style abbreviation key. Currently +-- we just skip them, since Pandoc doesn't have an element for +-- an abbreviation. +abbrevKey :: Parser [Char] ParserState (F Blocks) +abbrevKey = do + guardEnabled Ext_abbreviations + try $ do + char '*' + reference + char ':' + skipMany (satisfy (/= '\n')) + blanklines + return $ return mempty + +noteMarker :: Parser [Char] ParserState String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') -rawLine :: GenParser Char ParserState [Char] +rawLine :: Parser [Char] ParserState String rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: GenParser Char ParserState [Char] +rawLines :: Parser [Char] ParserState String rawLines = do first <- anyLine rest <- many rawLine return $ unlines (first:rest) -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parser [Char] ParserState (F Blocks) noteBlock = try $ do - startPos <- getPosition skipNonindentSpaces ref <- noteMarker char ':' @@ -275,87 +293,75 @@ noteBlock = try $ do (try (blankline >> indentSpaces >> notFollowedBy blankline)) optional blanklines - endPos <- getPosition - let newnote = (ref, (intercalate "\n" raw) ++ "\n\n") - st <- getState - let oldnotes = stateNotes st - updateState $ \s -> s { stateNotes = newnote : oldnotes } - -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + parsed <- parseFromString parseBlocks $ unlines raw ++ "\n" + let newnote = (ref, parsed) + updateState $ \s -> s { stateNotes' = newnote : stateNotes' s } + return mempty -- -- parsing blocks -- -parseBlocks :: GenParser Char ParserState [Block] -parseBlocks = manyTill block eof - -block :: GenParser Char ParserState Block -block = do - st <- getState - choice (if stateStrict st - then [ header - , codeBlockIndented - , blockQuote - , hrule - , bulletList - , orderedList - , htmlBlock - , para - , plain - , nullBlock ] - else [ codeBlockDelimited - , macro - , header - , table - , codeBlockIndented - , lhsCodeBlock - , blockQuote - , hrule - , bulletList - , orderedList - , definitionList - , rawTeXBlock - , para - , rawHtmlBlocks - , plain - , nullBlock ]) <?> "block" +parseBlocks :: Parser [Char] ParserState (F Blocks) +parseBlocks = mconcat <$> manyTill block eof + +block :: Parser [Char] ParserState (F Blocks) +block = choice [ codeBlockFenced + , codeBlockBackticks + , guardEnabled Ext_latex_macros *> (mempty <$ macro) + , header + , rawTeXBlock + , htmlBlock + , table + , codeBlockIndented + , lhsCodeBlock + , blockQuote + , hrule + , bulletList + , orderedList + , definitionList + , noteBlock + , referenceKey + , abbrevKey + , para + , plain + ] <?> "block" -- -- header blocks -- -header :: GenParser Char ParserState Block +header :: Parser [Char] ParserState (F Blocks) header = setextHeader <|> atxHeader <?> "header" -atxHeader :: GenParser Char ParserState Block +atxHeader :: Parser [Char] ParserState (F Blocks) atxHeader = try $ do level <- many1 (char '#') >>= return . length notFollowedBy (char '.' <|> char ')') -- this would be a list skipSpaces - text <- manyTill inline atxClosing >>= return . normalizeSpaces - return $ Header level text + text <- trimInlinesF . mconcat <$> manyTill inline atxClosing + return $ B.header level <$> text -atxClosing :: GenParser Char st [Char] +atxClosing :: Parser [Char] st String atxClosing = try $ skipMany (char '#') >> blanklines -setextHeader :: GenParser Char ParserState Block +setextHeader :: Parser [Char] ParserState (F Blocks) setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- many1Till inline newline + text <- trimInlinesF . mconcat <$> many1Till inline newline underlineChar <- oneOf setextHChars many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - return $ Header level (normalizeSpaces text) + return $ B.header level <$> text -- -- hrule block -- -hrule :: GenParser Char st Block +hrule :: Parser [Char] st (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -363,32 +369,26 @@ hrule = try $ do skipMany (spaceChar <|> char start) newline optional blanklines - return HorizontalRule + return $ return B.horizontalRule -- -- code blocks -- -indentedLine :: GenParser Char ParserState [Char] +indentedLine :: Parser [Char] ParserState String indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") blockDelimiter :: (Char -> Bool) -> Maybe Int - -> GenParser Char st (Int, (String, [String], [(String, String)]), Char) + -> Parser [Char] st Int blockDelimiter f len = try $ do c <- lookAhead (satisfy f) - size <- case len of - Just l -> count l (char c) >> many (char c) >> return l - Nothing -> count 3 (char c) >> many (char c) >>= - return . (+ 3) . length - many spaceChar - attr <- option ([],[],[]) - $ attributes -- ~~~ {.ruby} - <|> (many1 alphaNum >>= \x -> return ([],[x],[])) -- github variant ```ruby - blankline - return (size, attr, c) + case len of + Just l -> count l (char c) >> many (char c) >> return l + Nothing -> count 3 (char c) >> many (char c) >>= + return . (+ 3) . length -attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attributes :: Parser [Char] st (String, [String], [(String, String)]) attributes = try $ do char '{' spnl @@ -400,28 +400,28 @@ attributes = try $ do | otherwise = firstNonNull xs return (firstNonNull $ reverse ids, concat classes, concat keyvals) -attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) +attribute :: Parser [Char] st (String, [String], [(String, String)]) attribute = identifierAttr <|> classAttr <|> keyValAttr -identifier :: GenParser Char st [Char] +identifier :: Parser [Char] st String identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." return (first:rest) -identifierAttr :: GenParser Char st ([Char], [a], [a1]) +identifierAttr :: Parser [Char] st (String, [a], [a1]) identifierAttr = try $ do char '#' result <- identifier return (result,[],[]) -classAttr :: GenParser Char st ([Char], [[Char]], [a]) +classAttr :: Parser [Char] st (String, [String], [a]) classAttr = try $ do char '.' result <- identifier return ("",[result],[]) -keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])]) +keyValAttr :: Parser [Char] st (String, [a], [(String, String)]) keyValAttr = try $ do key <- identifier char '=' @@ -430,33 +430,49 @@ keyValAttr = try $ do <|> many nonspaceChar return ("",[],[(key,val)]) -codeBlockDelimited :: GenParser Char st Block -codeBlockDelimited = try $ do - (size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing - contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) +codeBlockFenced :: Parser [Char] ParserState (F Blocks) +codeBlockFenced = try $ do + guardEnabled Ext_fenced_code_blocks + size <- blockDelimiter (=='~') Nothing + skipMany spaceChar + attr <- option ([],[],[]) $ + guardEnabled Ext_fenced_code_attributes >> attributes + blankline + contents <- manyTill anyLine (blockDelimiter (=='~') (Just size)) + blanklines + return $ return $ B.codeBlockWith attr $ intercalate "\n" contents + +codeBlockBackticks :: Parser [Char] ParserState (F Blocks) +codeBlockBackticks = try $ do + guardEnabled Ext_backtick_code_blocks + blockDelimiter (=='`') (Just 3) + skipMany spaceChar + cls <- many1 alphaNum + blankline + contents <- manyTill anyLine $ blockDelimiter (=='`') (Just 3) blanklines - return $ CodeBlock attr $ intercalate "\n" contents + return $ return $ B.codeBlockWith ("",[cls],[]) $ intercalate "\n" contents -codeBlockIndented :: GenParser Char ParserState Block +codeBlockIndented :: Parser [Char] ParserState (F Blocks) codeBlockIndented = do - contents <- many1 (indentedLine <|> + contents <- many1 (indentedLine <|> try (do b <- blanklines l <- indentedLine return $ b ++ l)) optional blanklines - st <- getState - return $ CodeBlock ("", stateIndentedCodeClasses st, []) $ + classes <- getOption readerIndentedCodeClasses + return $ return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock :: Parser [Char] ParserState (F Blocks) lhsCodeBlock = do - failUnlessLHS - liftM (CodeBlock ("",["sourceCode","literate","haskell"],[])) - (lhsCodeBlockBird <|> lhsCodeBlockLaTeX) - <|> liftM (CodeBlock ("",["sourceCode","haskell"],[])) - lhsCodeBlockInverseBird + guardEnabled Ext_literate_haskell + (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + (lhsCodeBlockBird <|> lhsCodeBlockLaTeX)) + <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> + lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: GenParser Char ParserState String +lhsCodeBlockLaTeX :: Parser [Char] ParserState String lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline @@ -464,13 +480,13 @@ lhsCodeBlockLaTeX = try $ do blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: GenParser Char ParserState String +lhsCodeBlockBird :: Parser [Char] ParserState String lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: GenParser Char ParserState String +lhsCodeBlockInverseBird :: Parser [Char] ParserState String lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: Char -> GenParser Char ParserState String +lhsCodeBlockBirdWith :: Char -> Parser [Char] ParserState String lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -482,25 +498,24 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ intercalate "\n" lns' -birdTrackLine :: Char -> GenParser Char st [Char] +birdTrackLine :: Char -> Parser [Char] st String birdTrackLine c = try $ do char c -- allow html tags on left margin: when (c == '<') $ notFollowedBy letter manyTill anyChar newline - -- -- block quotes -- -emailBlockQuoteStart :: GenParser Char ParserState Char +emailBlockQuoteStart :: Parser [Char] ParserState Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ') -emailBlockQuote :: GenParser Char ParserState [[Char]] +emailBlockQuote :: Parser [Char] ParserState [String] emailBlockQuote = try $ do emailBlockQuoteStart - raw <- sepBy (many (nonEndline <|> + raw <- sepBy (many (nonEndline <|> (try (endline >> notFollowedBy emailBlockQuoteStart >> return '\n')))) (try (newline >> emailBlockQuoteStart)) @@ -508,51 +523,50 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: GenParser Char ParserState Block -blockQuote = do +blockQuote :: Parser [Char] ParserState (F Blocks) +blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: contents <- parseFromString parseBlocks $ (intercalate "\n" raw) ++ "\n\n" - return $ BlockQuote contents - + return $ B.blockQuote <$> contents + -- -- list blocks -- -bulletListStart :: GenParser Char ParserState () +bulletListStart :: Parser [Char] ParserState () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces - notFollowedBy' hrule -- because hrules start out just like lists + notFollowedBy' (() <$ hrule) -- because hrules start out just like lists satisfy isBulletListMarker spaceChar skipSpaces -anyOrderedListStart :: GenParser Char ParserState (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart :: Parser [Char] ParserState (Int, ListNumberStyle, ListNumberDelim) anyOrderedListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number - state <- getState - if stateStrict state - then do many1 digit - char '.' - spaceChar - return (1, DefaultStyle, DefaultDelim) - else do (num, style, delim) <- anyOrderedListMarker - -- if it could be an abbreviated first name, insist on more than one space - if delim == Period && (style == UpperAlpha || (style == UpperRoman && - num `elem` [1, 5, 10, 50, 100, 500, 1000])) - then char '\t' <|> (try $ char ' ' >> spaceChar) - else spaceChar - skipSpaces - return (num, style, delim) - -listStart :: GenParser Char ParserState () + (guardDisabled Ext_fancy_lists >> + do many1 digit + char '.' + spaceChar + return (1, DefaultStyle, DefaultDelim)) + <|> do (num, style, delim) <- anyOrderedListMarker + -- if it could be an abbreviated first name, insist on more than one space + if delim == Period && (style == UpperAlpha || (style == UpperRoman && + num `elem` [1, 5, 10, 50, 100, 500, 1000])) + then char '\t' <|> (try $ char ' ' >> spaceChar) + else spaceChar + skipSpaces + return (num, style, delim) + +listStart :: Parser [Char] ParserState () listStart = bulletListStart <|> (anyOrderedListStart >> return ()) -- parse a line of a list item (start = parser for beginning of list item) -listLine :: GenParser Char ParserState [Char] +listLine :: Parser [Char] ParserState String listLine = try $ do notFollowedBy blankline notFollowedBy' (do indentSpaces @@ -562,8 +576,8 @@ listLine = try $ do return $ concat chunks ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState a - -> GenParser Char ParserState [Char] +rawListItem :: Parser [Char] ParserState a + -> Parser [Char] ParserState String rawListItem start = try $ do start first <- listLine @@ -571,17 +585,17 @@ rawListItem start = try $ do blanks <- many blankline return $ concat (first:rest) ++ blanks --- continuation of a list item - indented and separated by blankline +-- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: GenParser Char ParserState [Char] +listContinuation :: Parser [Char] ParserState String listContinuation = try $ do lookAhead indentSpaces result <- many1 listContinuationLine blanks <- many blankline return $ concat result ++ blanks -listContinuationLine :: GenParser Char ParserState [Char] +listContinuationLine :: Parser [Char] ParserState String listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart @@ -589,8 +603,8 @@ listContinuationLine = try $ do result <- manyTill anyChar newline return $ result ++ "\n" -listItem :: GenParser Char ParserState a - -> GenParser Char ParserState [Block] +listItem :: Parser [Char] ParserState a + -> Parser [Char] ParserState (F Blocks) listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -606,38 +620,59 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: GenParser Char ParserState Block +orderedList :: Parser [Char] ParserState (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart - items <- many1 $ listItem $ try $ - do optional newline -- if preceded by a Plain block in a list context - skipNonindentSpaces - orderedListMarker style delim - return $ OrderedList (start, style, delim) $ compactify items - -bulletList :: GenParser Char ParserState Block -bulletList = - many1 (listItem bulletListStart) >>= return . BulletList . compactify + unless ((style == DefaultStyle || style == Decimal || style == Example) && + (delim == DefaultDelim || delim == Period)) $ + guardEnabled Ext_fancy_lists + when (style == Example) $ guardEnabled Ext_example_lists + items <- fmap sequence $ many1 $ listItem + ( try $ do + optional newline -- if preceded by Plain block in a list + skipNonindentSpaces + orderedListMarker style delim ) + start' <- option 1 $ guardEnabled Ext_startnum >> return start + return $ B.orderedListWith (start', style, delim) <$> fmap compactify items + +-- | Change final list item from @Para@ to @Plain@ if the list contains +-- no other @Para@ blocks. (From Shared, modified for Blocks rather than [Block].) +compactify :: [Blocks] -- ^ List of list items (each a list of blocks) + -> [Blocks] +compactify [] = [] +compactify items = + let (others, final) = (init items, last items) + in case reverse (B.toList final) of + (Para a:xs) -> case [Para x | Para x <- concatMap B.toList items] of + -- if this is only Para, change to Plain + [_] -> others ++ [B.fromList (reverse $ Plain a : xs)] + _ -> items + _ -> items + +bulletList :: Parser [Char] ParserState (F Blocks) +bulletList = do + items <- fmap sequence $ many1 $ listItem bulletListStart + return $ B.bulletList <$> fmap compactify items -- definition lists -defListMarker :: GenParser Char ParserState () +defListMarker :: Parser [Char] ParserState () defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' - st <- getState - let tabStop = stateTabStop st + tabStop <- getOption readerTabStop let remaining = tabStop - (length sps + 1) if remaining > 0 then count remaining (char ' ') <|> string "\t" - else pzero + else mzero return () -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState (F (Inlines, [Blocks])) definitionListItem = try $ do + guardEnabled Ext_definition_lists -- first, see if this has any chance of being a definition list: lookAhead (anyLine >> optional blankline >> defListMarker) - term <- manyTill inline newline + term <- trimInlinesF . mconcat <$> manyTill inline newline optional blankline raw <- many1 defRawBlock state <- getState @@ -645,9 +680,9 @@ definitionListItem = try $ do -- parse the extracted block, which may contain various block elements: contents <- mapM (parseFromString parseBlocks) raw updateState (\st -> st {stateParserContext = oldContext}) - return ((normalizeSpaces term), contents) + return $ liftM2 (,) term (sequence contents) -defRawBlock :: GenParser Char ParserState [Char] +defRawBlock :: Parser [Char] ParserState String defRawBlock = try $ do defListMarker firstline <- anyLine @@ -659,119 +694,149 @@ defRawBlock = try $ do return $ unlines lns ++ trl return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont -definitionList :: GenParser Char ParserState Block +definitionList :: Parser [Char] ParserState (F Blocks) definitionList = do - items <- many1 definitionListItem - -- "compactify" the definition list: - let defs = map snd items - let defBlocks = reverse $ concat $ concat defs - let isPara (Para _) = True + items <- fmap sequence $ many1 definitionListItem + return $ B.definitionList <$> fmap compactifyDL items + +compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] +compactifyDL items = + let defs = concatMap snd items + defBlocks = reverse $ concatMap B.toList defs + isPara (Para _) = True isPara _ = False - let items' = case take 1 defBlocks of - [Para x] -> if not $ any isPara (drop 1 defBlocks) - then let (t,ds) = last items - lastDef = last ds - ds' = init ds ++ - [init lastDef ++ [Plain x]] - in init items ++ [(t, ds')] - else items - _ -> items - return $ DefinitionList items' + in case defBlocks of + (Para x:_) -> if not $ any isPara (drop 1 defBlocks) + then let (t,ds) = last items + lastDef = B.toList $ last ds + ds' = init ds ++ + [B.fromList $ init lastDef ++ [Plain x]] + in init items ++ [(t, ds')] + else items + _ -> items -- -- paragraph block -- +{- isHtmlOrBlank :: Inline -> Bool isHtmlOrBlank (RawInline "html" _) = True isHtmlOrBlank (Space) = True isHtmlOrBlank (LineBreak) = True isHtmlOrBlank _ = False +-} -para :: GenParser Char ParserState Block -para = try $ do - result <- liftM normalizeSpaces $ many1 inline - guard $ not . all isHtmlOrBlank $ result - option (Plain result) $ try $ do +para :: Parser [Char] ParserState (F Blocks) +para = try $ do + result <- trimInlinesF . mconcat <$> many1 inline + -- TODO remove this if not really needed? and remove isHtmlOrBlank + -- guard $ not $ F.all isHtmlOrBlank result + option (B.plain <$> result) $ try $ do newline - blanklines <|> - (getState >>= guard . stateStrict >> - lookAhead (blockQuote <|> header) >> return "") - return $ Para result + (blanklines >> return mempty) + <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote) + <|> (guardDisabled Ext_blank_before_header >> lookAhead header) + return $ B.para <$> result -plain :: GenParser Char ParserState Block -plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces +plain :: Parser [Char] ParserState (F Blocks) +plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline <* spaces --- +-- -- raw html -- -htmlElement :: GenParser Char ParserState [Char] +htmlElement :: Parser [Char] ParserState String htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: GenParser Char ParserState Block -htmlBlock = try $ do - failUnlessBeginningOfLine +htmlBlock :: Parser [Char] ParserState (F Blocks) +htmlBlock = do + guardEnabled Ext_raw_html + res <- (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks) + <|> htmlBlock' + return $ return $ B.rawBlock "html" res + +htmlBlock' :: Parser [Char] ParserState String +htmlBlock' = try $ do first <- htmlElement finalSpace <- many spaceChar finalNewlines <- many newline - return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines + return $ first ++ finalSpace ++ finalNewlines -strictHtmlBlock :: GenParser Char ParserState [Char] -strictHtmlBlock = do - failUnlessBeginningOfLine - htmlInBalanced (not . isInlineTag) +strictHtmlBlock :: Parser [Char] ParserState String +strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: GenParser Char ParserState String +rawVerbatimBlock :: Parser [Char] ParserState String rawVerbatimBlock = try $ do (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> - t == "pre" || t == "style" || t == "script") - (const True)) + t == "pre" || t == "style" || t == "script") + (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags [TagClose tag] -rawTeXBlock :: GenParser Char ParserState Block +rawTeXBlock :: Parser [Char] ParserState (F Blocks) rawTeXBlock = do - failIfStrict - result <- liftM (RawBlock "latex") rawLaTeXBlock - <|> liftM (RawBlock "context") rawConTeXtEnvironment + guardEnabled Ext_raw_tex + result <- (B.rawBlock "latex" <$> rawLaTeXBlock) + <|> (B.rawBlock "context" <$> rawConTeXtEnvironment) spaces - return result + return $ return result -rawHtmlBlocks :: GenParser Char ParserState Block +rawHtmlBlocks :: Parser [Char] ParserState String rawHtmlBlocks = do - htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|> - liftM snd (htmlTag isBlockTag) - sps <- do sp1 <- many spaceChar - sp2 <- option "" (blankline >> return "\n") - sp3 <- many spaceChar - sp4 <- option "" blanklines - return $ sp1 ++ sp2 ++ sp3 ++ sp4 - -- note: we want raw html to be able to - -- precede a code block, when separated - -- by a blank line - return $ blk ++ sps + htmlBlocks <- many1 $ try $ do + s <- rawVerbatimBlock <|> try ( + do (t,raw) <- htmlTag isBlockTag + exts <- getOption readerExtensions + -- if open tag, need markdown="1" if + -- markdown_attributes extension is set + case t of + TagOpen _ as + | Ext_markdown_attribute `Set.member` + exts -> + if "markdown" `notElem` + map fst as + then mzero + else return $ + stripMarkdownAttribute raw + | otherwise -> return raw + _ -> return raw ) + sps <- do sp1 <- many spaceChar + sp2 <- option "" (blankline >> return "\n") + sp3 <- many spaceChar + sp4 <- option "" blanklines + return $ sp1 ++ sp2 ++ sp3 ++ sp4 + -- note: we want raw html to be able to + -- precede a code block, when separated + -- by a blank line + return $ s ++ sps let combined = concat htmlBlocks - let combined' = if last combined == '\n' then init combined else combined - return $ RawBlock "html" combined' + return $ if last combined == '\n' then init combined else combined + +-- remove markdown="1" attribute +stripMarkdownAttribute :: String -> String +stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s + where filterAttrib (TagOpen t as) = TagOpen t + [(k,v) | (k,v) <- as, k /= "markdown"] + filterAttrib x = x -- -- Tables --- +-- -- Parse a dashed line with optional trailing spaces; return its length -- and the length including trailing space. -dashedLine :: Char - -> GenParser Char st (Int, Int) +dashedLine :: Char + -> Parser [Char] st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar return $ (length dashes, length $ dashes ++ sp) --- Parse a table header with dashed lines of '-' preceded by +-- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. -simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) +simpleTableHeader :: Bool -- ^ Headerless table + -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -784,84 +849,104 @@ simpleTableHeader headless = try $ do -- If no header, calculate alignment on basis of first row of text rawHeads <- liftM (tail . splitStringByIndices (init indices)) $ if headless - then lookAhead anyLine + then lookAhead anyLine else return rawContent let aligns = zipWith alignType (map (\a -> [a]) rawHeads) lengths let rawHeads' = if headless then replicate (length dashes) "" - else rawHeads - heads <- mapM (parseFromString (many plain)) $ - map removeLeadingTrailingSpace rawHeads' + else rawHeads + heads <- fmap sequence + $ mapM (parseFromString (mconcat <$> many plain)) + $ map removeLeadingTrailingSpace rawHeads' return (heads, aligns, indices) +-- Returns an alignment type for a table, based on a list of strings +-- (the rows of the column header) and a number (the length of the +-- dashed line under the rows. +alignType :: [String] + -> Int + -> Alignment +alignType [] _ = AlignDefault +alignType strLst len = + let nonempties = filter (not . null) $ map removeTrailingSpace strLst + (leftSpace, rightSpace) = + case sortBy (comparing length) nonempties of + (x:_) -> (head x `elem` " \t", length x < len) + [] -> (False, False) + in case (leftSpace, rightSpace) of + (True, False) -> AlignRight + (False, True) -> AlignLeft + (True, True) -> AlignCenter + (False, False) -> AlignDefault + -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: GenParser Char ParserState [Char] +tableFooter :: Parser [Char] ParserState String tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep :: GenParser Char ParserState Char +tableSep :: Parser [Char] ParserState Char tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. rawTableLine :: [Int] - -> GenParser Char ParserState [String] + -> Parser [Char] ParserState [String] rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline - return $ map removeLeadingTrailingSpace $ tail $ + return $ map removeLeadingTrailingSpace $ tail $ splitStringByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] - -> GenParser Char ParserState [[Block]] -tableLine indices = rawTableLine indices >>= mapM (parseFromString (many plain)) + -> Parser [Char] ParserState (F [Blocks]) +tableLine indices = rawTableLine indices >>= + fmap sequence . mapM (parseFromString (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: [Int] - -> GenParser Char ParserState [[Block]] + -> Parser [Char] ParserState (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines - mapM (parseFromString (many plain)) cols + fmap sequence $ mapM (parseFromString (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: GenParser Char ParserState [Inline] +tableCaption :: Parser [Char] ParserState (F Inlines) tableCaption = try $ do + guardEnabled Ext_table_captions skipNonindentSpaces string ":" <|> string "Table:" - result <- many1 inline - blanklines - return $ normalizeSpaces result + trimInlinesF . mconcat <$> many1 inline <* blanklines -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine + (aligns, _widths, heads', lines') <- + tableWith (simpleTableHeader headless) tableLine (return ()) (if headless then tableFooter else tableFooter <|> blanklines) - tableCaption -- Simple tables get 0s for relative column widths (i.e., use default) - return $ Table c a (replicate (length a) 0) h l + return (aligns, replicate (length aligns) 0, heads', lines') -- Parse a multiline table: starts with row of '-' on top, then header -- (which may be multiline), then the rows, -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). multilineTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) multilineTable headless = - tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption + tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter multilineTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) + -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do if headless then return '\n' else tableSep >>~ notFollowedBy blankline rawContent <- if headless - then return $ repeat "" + then return $ repeat "" else many1 (notFollowedBy tableSep >> many1Till anyChar newline) initSp <- nonindentSpaces @@ -872,54 +957,206 @@ multilineTableHeader headless = try $ do rawHeadsList <- if headless then liftM (map (:[]) . tail . splitStringByIndices (init indices)) $ lookAhead anyLine - else return $ transpose $ map + else return $ transpose $ map (\ln -> tail $ splitStringByIndices (init indices) ln) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" else map (intercalate " ") rawHeadsList - heads <- mapM (parseFromString (many plain)) $ + heads <- fmap sequence $ + mapM (parseFromString (mconcat <$> many plain)) $ map removeLeadingTrailingSpace rawHeads return (heads, aligns, indices) --- Returns an alignment type for a table, based on a list of strings --- (the rows of the column header) and a number (the length of the --- dashed line under the rows. -alignType :: [String] - -> Int - -> Alignment -alignType [] _ = AlignDefault -alignType strLst len = - let nonempties = filter (not . null) $ map removeTrailingSpace strLst - (leftSpace, rightSpace) = - case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) - [] -> (False, False) - in case (leftSpace, rightSpace) of - (True, False) -> AlignRight - (False, True) -> AlignLeft - (True, True) -> AlignCenter - (False, False) -> AlignDefault - +-- Parse a grid table: starts with row of '-' on top, then header +-- (which may be grid), then the rows, +-- which may be grid, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable = gridTableWith block tableCaption + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +gridTable headless = + tableWith (gridTableHeader headless) gridTableRow + (gridTableSep '-') gridTableFooter + +gridTableSplitLine :: [Int] -> String -> [String] +gridTableSplitLine indices line = map removeFinalBar $ tail $ + splitStringByIndices (init indices) $ removeTrailingSpace line + +gridPart :: Char -> Parser [Char] st (Int, Int) +gridPart ch = do + dashes <- many1 (char ch) + char '+' + return (length dashes, length dashes + 1) -table :: GenParser Char ParserState Block -table = multilineTable False <|> simpleTable True <|> - simpleTable False <|> multilineTable True <|> - gridTable False <|> gridTable True <?> "table" +gridDashedLines :: Char -> Parser [Char] st [(Int,Int)] +gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline + +removeFinalBar :: String -> String +removeFinalBar = + reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse + +-- | Separator between rows of grid table. +gridTableSep :: Char -> Parser [Char] ParserState Char +gridTableSep ch = try $ gridDashedLines ch >> return '\n' + +-- | Parse header for a grid table. +gridTableHeader :: Bool -- ^ Headerless table + -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) +gridTableHeader headless = try $ do + optional blanklines + dashes <- gridDashedLines '-' + rawContent <- if headless + then return $ repeat "" + else many1 + (notFollowedBy (gridTableSep '=') >> char '|' >> + many1Till anyChar newline) + if headless + then return () + else gridTableSep '=' >> return () + let lines' = map snd dashes + let indices = scanl (+) 0 lines' + let aligns = replicate (length lines') AlignDefault + -- RST does not have a notion of alignments + let rawHeads = if headless + then replicate (length dashes) "" + else map (intercalate " ") $ transpose + $ map (gridTableSplitLine indices) rawContent + heads <- fmap sequence $ mapM (parseFromString block) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) --- +gridTableRawLine :: [Int] -> Parser [Char] ParserState [String] +gridTableRawLine indices = do + char '|' + line <- many1Till anyChar newline + return (gridTableSplitLine indices line) + +-- | Parse row of grid table. +gridTableRow :: [Int] + -> Parser [Char] ParserState (F [Blocks]) +gridTableRow indices = do + colLines <- many1 (gridTableRawLine indices) + let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ + transpose colLines + fmap compactify <$> fmap sequence (mapM (parseFromString block) cols) + +removeOneLeadingSpace :: [String] -> [String] +removeOneLeadingSpace xs = + if all startsWithSpace xs + then map (drop 1) xs + else xs + where startsWithSpace "" = True + startsWithSpace (y:_) = y == ' ' + +-- | Parse footer for a grid table. +gridTableFooter :: Parser [Char] ParserState [Char] +gridTableFooter = blanklines + +pipeTable :: Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable = try $ do + let pipeBreak = nonindentSpaces *> optional (char '|') *> + pipeTableHeaderPart `sepBy1` sepPipe <* + optional (char '|') <* blankline + (heads,aligns) <- try ( pipeBreak >>= \als -> + return (return $ replicate (length als) mempty, als)) + <|> ( pipeTableRow >>= \row -> pipeBreak >>= \als -> + + return (row, als) ) + lines' <- sequence <$> many1 pipeTableRow + blanklines + let widths = replicate (length aligns) 0.0 + return $ (aligns, widths, heads, lines') + +sepPipe :: Parser [Char] ParserState () +sepPipe = try $ do + char '|' <|> char '+' + notFollowedBy blankline + +-- parse a row, also returning probable alignments for org-table cells +pipeTableRow :: Parser [Char] ParserState (F [Blocks]) +pipeTableRow = do + nonindentSpaces + optional (char '|') + let cell = mconcat <$> + many (notFollowedBy (blankline <|> char '|') >> inline) + first <- cell + sepPipe + rest <- cell `sepBy1` sepPipe + optional (char '|') + blankline + let cells = sequence (first:rest) + return $ do + cells' <- cells + return $ map + (\ils -> + case trimInlines ils of + ils' | B.isNull ils' -> mempty + | otherwise -> B.plain $ ils') cells' + +pipeTableHeaderPart :: Parser [Char] st Alignment +pipeTableHeaderPart = do + left <- optionMaybe (char ':') + many1 (char '-') + right <- optionMaybe (char ':') + return $ + case (left,right) of + (Nothing,Nothing) -> AlignDefault + (Just _,Nothing) -> AlignLeft + (Nothing,Just _) -> AlignRight + (Just _,Just _) -> AlignCenter + +-- Succeed only if current line contains a pipe. +scanForPipe :: Parser [Char] st () +scanForPipe = lookAhead (manyTill (satisfy (/='\n')) (char '|')) >> return () + +-- | Parse a table using 'headerParser', 'rowParser', +-- 'lineParser', and 'footerParser'. Variant of the version in +-- Text.Pandoc.Parsing. +tableWith :: Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) + -> ([Int] -> Parser [Char] ParserState (F [Blocks])) + -> Parser [Char] ParserState sep + -> Parser [Char] ParserState end + -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +tableWith headerParser rowParser lineParser footerParser = try $ do + (heads, aligns, indices) <- headerParser + lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser + footerParser + numColumns <- getOption readerColumns + let widths = if (indices == []) + then replicate (length aligns) 0.0 + else widthsFromIndices numColumns indices + return $ (aligns, widths, heads, lines') + +table :: Parser [Char] ParserState (F Blocks) +table = try $ do + frontCaption <- option Nothing (Just <$> tableCaption) + (aligns, widths, heads, lns) <- + try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|> + try (guardEnabled Ext_multiline_tables >> + multilineTable False) <|> + try (guardEnabled Ext_simple_tables >> + (simpleTable True <|> simpleTable False)) <|> + try (guardEnabled Ext_multiline_tables >> + multilineTable True) <|> + try (guardEnabled Ext_grid_tables >> + (gridTable False <|> gridTable True)) <?> "table" + optional blanklines + caption <- case frontCaption of + Nothing -> option (return mempty) tableCaption + Just c -> return c + return $ do + caption' <- caption + heads' <- heads + lns' <- lns + return $ B.table caption' (zip aligns widths) heads' lns' + +-- -- inline -- -inline :: GenParser Char ParserState Inline -inline = choice inlineParsers <?> "inline" - -inlineParsers :: [GenParser Char ParserState Inline] -inlineParsers = [ whitespace +inline :: Parser [Char] ParserState (F Inlines) +inline = choice [ whitespace , str , endline , code @@ -927,8 +1164,8 @@ inlineParsers = [ whitespace , strong , emph , note - , link , cite + , link , image , math , strikeout @@ -940,115 +1177,127 @@ inlineParsers = [ whitespace , escapedChar , rawLaTeXInline' , exampleRef - , smartPunctuation inline - , charRef + , smart + , return . B.singleton <$> charRef , symbol - , ltSign ] + , ltSign + ] <?> "inline" -escapedChar' :: GenParser Char ParserState Char +escapedChar' :: Parser [Char] ParserState Char escapedChar' = try $ do char '\\' - state <- getState - if stateStrict state - then oneOf "\\`*_{}[]()>#+-.!~" - else satisfy (not . isAlphaNum) + (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) + <|> oneOf "\\`*_{}[]()>#+-.!~" -escapedChar :: GenParser Char ParserState Inline +escapedChar :: Parser [Char] ParserState (F Inlines) escapedChar = do result <- escapedChar' - return $ case result of - ' ' -> Str "\160" -- "\ " is a nonbreaking space - '\n' -> LineBreak -- "\[newline]" is a linebreak - _ -> Str [result] + case result of + ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space + '\n' -> guardEnabled Ext_escaped_line_breaks >> + return (return B.linebreak) -- "\[newline]" is a linebreak + _ -> return $ return $ B.str [result] -ltSign :: GenParser Char ParserState Inline +ltSign :: Parser [Char] ParserState (F Inlines) ltSign = do - st <- getState - if stateStrict st - then char '<' - else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html - return $ Str ['<'] + guardDisabled Ext_raw_html + <|> guardDisabled Ext_markdown_in_html_blocks + <|> (notFollowedBy' rawHtmlBlocks >> return ()) + char '<' + return $ return $ B.str "<" -exampleRef :: GenParser Char ParserState Inline +exampleRef :: Parser [Char] ParserState (F Inlines) exampleRef = try $ do + guardEnabled Ext_example_lists char '@' lab <- many1 (alphaNum <|> oneOf "-_") - -- We just return a Str. These are replaced with numbers - -- later. See the end of parseMarkdown. - return $ Str $ '@' : lab - -symbol :: GenParser Char ParserState Inline -symbol = do + return $ do + st <- askF + return $ case M.lookup lab (stateExamples st) of + Just n -> B.str (show n) + Nothing -> B.str ('@':lab) + +symbol :: Parser [Char] ParserState (F Inlines) +symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' - notFollowedBy' rawTeXBlock + notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ Str [result] + return $ return $ B.str [result] -- parses inline code, between n `s and n `s -code :: GenParser Char ParserState Inline -code = try $ do +code :: Parser [Char] ParserState (F Inlines) +code = try $ do starts <- many1 (char '`') skipSpaces result <- many1Till (many1 (noneOf "`\n") <|> many1 (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " ")) - (try (skipSpaces >> count (length starts) (char '`') >> + (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - attr <- option ([],[],[]) (try $ optional whitespace >> attributes) - return $ Code attr $ removeLeadingTrailingSpace $ concat result - -mathWord :: GenParser Char st [Char] -mathWord = liftM concat $ many1 mathChunk - -mathChunk :: GenParser Char st [Char] -mathChunk = do char '\\' - c <- anyChar - return ['\\',c] - <|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$')) - -math :: GenParser Char ParserState Inline -math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath) - <|> (mathInline >>= applyMacros' >>= return . Math InlineMath) - -mathDisplay :: GenParser Char ParserState String -mathDisplay = try $ do - failIfStrict - string "$$" - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$") - -mathInline :: GenParser Char ParserState String -mathInline = try $ do - failIfStrict - char '$' + attr <- option ([],[],[]) (try $ guardEnabled Ext_inline_code_attributes >> + optional whitespace >> attributes) + return $ return $ B.codeWith attr $ removeLeadingTrailingSpace $ concat result + +math :: Parser [Char] ParserState (F Inlines) +math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) + <|> (return . B.math <$> (mathInline >>= applyMacros')) + +mathDisplay :: Parser [Char] ParserState String +mathDisplay = + (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathDisplayWith "\\[" "\\]") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathDisplayWith "\\\\[" "\\\\]") + +mathDisplayWith :: String -> String -> Parser [Char] ParserState String +mathDisplayWith op cl = try $ do + string op + many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) + +mathInline :: Parser [Char] ParserState String +mathInline = + (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathInlineWith "\\(" "\\)") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathInlineWith "\\\\(" "\\\\)") + +mathInlineWith :: String -> String -> Parser [Char] ParserState String +mathInlineWith op cl = try $ do + string op notFollowedBy space - words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline))) - char '$' - notFollowedBy digit - return $ intercalate " " words' + words' <- many1Till (count 1 (noneOf "\n\\") + <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> count 1 newline <* notFollowedBy' blankline + *> return " ") + (try $ string cl) + notFollowedBy digit -- to prevent capture of $5 + return $ concat words' -- to avoid performance problems, treat 4 or more _ or * or ~ or ^ in a row -- as a literal rather than attempting to parse for emph/strong/strikeout/super/sub -fours :: GenParser Char st Inline +fours :: Parser [Char] st (F Inlines) fours = try $ do x <- char '*' <|> char '_' <|> char '~' <|> char '^' count 2 $ satisfy (==x) rest <- many1 (satisfy (==x)) - return $ Str (x:x:x:rest) + return $ return $ B.str (x:x:x:rest) -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) - => GenParser Char ParserState a - -> GenParser Char ParserState b - -> GenParser Char ParserState [Inline] + => Parser [Char] ParserState a + -> Parser [Char] ParserState b + -> Parser [Char] ParserState (F Inlines) inlinesBetween start end = - normalizeSpaces `liftM` try (start >> many1Till inner end) - where inner = innerSpace <|> (notFollowedBy' whitespace >> inline) + (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) + where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end -- This is used to prevent exponential blowups for things like: -- a**a*a**a*a**a*a**a*a**a*a**a*a** -nested :: GenParser Char ParserState a - -> GenParser Char ParserState a +nested :: Parser [Char] ParserState a + -> Parser [Char] ParserState a nested p = do nestlevel <- stateMaxNestingLevel `fmap` getState guard $ nestlevel > 0 @@ -1057,54 +1306,57 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -emph :: GenParser Char ParserState Inline -emph = Emph `fmap` nested +emph :: Parser [Char] ParserState (F Inlines) +emph = fmap B.emph <$> nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = char '*' >> lookAhead nonspaceChar - starEnd = notFollowedBy' strong >> char '*' + starEnd = notFollowedBy' (() <$ strong) >> char '*' ulStart = char '_' >> lookAhead nonspaceChar - ulEnd = notFollowedBy' strong >> char '_' + ulEnd = notFollowedBy' (() <$ strong) >> char '_' -strong :: GenParser Char ParserState Inline -strong = Strong `liftM` nested +strong :: Parser [Char] ParserState (F Inlines) +strong = fmap B.strong <$> nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = string "**" >> lookAhead nonspaceChar starEnd = try $ string "**" ulStart = string "__" >> lookAhead nonspaceChar ulEnd = try $ string "__" -strikeout :: GenParser Char ParserState Inline -strikeout = Strikeout `liftM` - (failIfStrict >> inlinesBetween strikeStart strikeEnd) +strikeout :: Parser [Char] ParserState (F Inlines) +strikeout = fmap B.strikeout <$> + (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: GenParser Char ParserState Inline -superscript = failIfStrict >> enclosed (char '^') (char '^') - (notFollowedBy spaceChar >> inline) >>= -- may not contain Space - return . Superscript +superscript :: Parser [Char] ParserState (F Inlines) +superscript = fmap B.superscript <$> try (do + guardEnabled Ext_superscript + char '^' + mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: GenParser Char ParserState Inline -subscript = failIfStrict >> enclosed (char '~') (char '~') - (notFollowedBy spaceChar >> inline) >>= -- may not contain Space - return . Subscript +subscript :: Parser [Char] ParserState (F Inlines) +subscript = fmap B.subscript <$> try (do + guardEnabled Ext_subscript + char '~' + mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: GenParser Char ParserState Inline -whitespace = spaceChar >> - ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak)) - <|> (skipMany spaceChar >> return Space) ) <?> "whitespace" +whitespace :: Parser [Char] ParserState (F Inlines) +whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" + where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) + regsp = skipMany spaceChar >> return B.space -nonEndline :: GenParser Char st Char +nonEndline :: Parser [Char] st Char nonEndline = satisfy (/='\n') -str :: GenParser Char ParserState Inline +str :: Parser [Char] ParserState (F Inlines) str = do - smart <- stateSmart `fmap` getState + isSmart <- readerSmart . stateOptions <$> getState a <- alphaNum as <- many $ alphaNum - <|> (try $ char '_' >>~ lookAhead alphaNum) - <|> if smart + <|> (guardEnabled Ext_intraword_underscores >> + try (char '_' >>~ lookAhead alphaNum)) + <|> if isSmart then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >> lookAhead alphaNum >> return '\x2019') -- for things like l'aide @@ -1113,15 +1365,16 @@ str = do updateState $ \s -> s{ stateLastStrPos = Just pos } let result = a:as let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) - if smart + if isSmart then case likelyAbbrev result of - [] -> return $ Str result + [] -> return $ return $ B.str result xs -> choice (map (\x -> try (string x >> oneOf " \n" >> lookAhead alphaNum >> - return (Str $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ Str result) - else return $ Str result + return (return $ B.str + $ result ++ spacesToNbr x ++ "\160"))) xs) + <|> (return $ return $ B.str result) + else return $ return $ B.str result -- | if the string matches the beginning of an abbreviation (before -- the first period, return strings that would finish the abbreviation. @@ -1136,39 +1389,38 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline +endline :: Parser [Char] ParserState (F Inlines) endline = try $ do newline notFollowedBy blankline - st <- getState - when (stateStrict st) $ do - notFollowedBy emailBlockQuoteStart - notFollowedBy (char '#') -- atx header + guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart + guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header -- parse potential list-starts differently if in a list: + st <- getState when (stateParserContext st == ListItemState) $ do notFollowedBy' bulletListStart notFollowedBy' anyOrderedListStart - return Space + (guardEnabled Ext_hard_line_breaks >> return (return B.linebreak)) + <|> (return $ return B.space) -- -- links -- -- a reference label for a link -reference :: GenParser Char ParserState [Inline] +reference :: Parser [Char] ParserState (F Inlines, String) reference = do notFollowedBy' (string "[^") -- footnote reference - result <- inlinesInBalancedBrackets inline - return $ normalizeSpaces result + withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -- source for a link, with optional title -source :: GenParser Char ParserState (String, [Char]) +source :: Parser [Char] ParserState (String, String) source = (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|> -- the following is needed for cases like: [ref](/url(a). (enclosed (char '(') (char ')') litChar >>= parseFromString source') -- auxiliary function for source -source' :: GenParser Char ParserState (String, [Char]) +source' :: Parser [Char] ParserState (String, String) source' = do skipSpaces let nl = char '\n' >>~ notFollowedBy blankline @@ -1186,7 +1438,7 @@ source' = do eof return (escapeURI $ removeTrailingSpace src, tit) -linkTitle :: GenParser Char ParserState String +linkTitle :: Parser [Char] ParserState String linkTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces @@ -1194,78 +1446,88 @@ linkTitle = try $ do tit <- manyTill litChar (try (char delim >> skipSpaces >> eof)) return $ fromEntities tit -link :: GenParser Char ParserState Inline +link :: Parser [Char] ParserState (F Inlines) link = try $ do - lab <- reference - (src, tit) <- source <|> referenceLink lab - return $ Link (delinkify lab) (src, tit) - -delinkify :: [Inline] -> [Inline] -delinkify = bottomUp $ concatMap go - where go (Link lab _) = lab - go x = [x] + st <- getState + guard $ stateAllowLinks st + setState $ st{ stateAllowLinks = False } + (lab,raw) <- reference + setState $ st{ stateAllowLinks = True } + regLink B.link lab <|> referenceLink B.link (lab,raw) + +regLink :: (String -> String -> Inlines -> Inlines) + -> F Inlines -> Parser [Char] ParserState (F Inlines) +regLink constructor lab = try $ do + (src, tit) <- source + return $ constructor src tit <$> lab -- a link like [this][ref] or [this][] or [this] -referenceLink :: [Inline] - -> GenParser Char ParserState (String, [Char]) -referenceLink lab = do - ref <- option [] (try (optional (char ' ') >> - optional (newline >> skipSpaces) >> reference)) - let ref' = if null ref then lab else ref - state <- getState - case lookupKeySrc (stateKeys state) (toKey ref') of - Nothing -> fail "no corresponding key" - Just target -> return target - -autoLink :: GenParser Char ParserState Inline +referenceLink :: (String -> String -> Inlines -> Inlines) + -> (F Inlines, String) -> Parser [Char] ParserState (F Inlines) +referenceLink constructor (lab, raw) = do + raw' <- try (optional (char ' ') >> + optional (newline >> skipSpaces) >> + (snd <$> reference)) <|> return "" + let key = toKey $ if raw' == "[]" || raw' == "" then raw else raw' + let dropRB (']':xs) = xs + dropRB xs = xs + let dropLB ('[':xs) = xs + dropLB xs = xs + let dropBrackets = reverse . dropRB . reverse . dropLB + fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + return $ do + keys <- asksF stateKeys + case M.lookup key keys of + Nothing -> (\x -> B.str "[" <> x <> B.str "]" <> B.str raw') <$> fallback + Just (src,tit) -> constructor src tit <$> lab + +autoLink :: Parser [Char] ParserState (F Inlines) autoLink = try $ do char '<' (orig, src) <- uri <|> emailAddress char '>' - st <- getState - return $ if stateStrict st - then Link [Str orig] (src, "") - else Link [Code ("",["url"],[]) orig] (src, "") + (guardEnabled Ext_autolink_code_spans >> + return (return $ B.link src "" (B.codeWith ("",["url"],[]) orig))) + <|> return (return $ B.link src "" (B.str orig)) -image :: GenParser Char ParserState Inline +image :: Parser [Char] ParserState (F Inlines) image = try $ do char '!' - lab <- reference - (src, tit) <- source <|> referenceLink lab - return $ Image lab (src,tit) + (lab,raw) <- reference + regLink B.image lab <|> referenceLink B.image (lab,raw) -note :: GenParser Char ParserState Inline +note :: Parser [Char] ParserState (F Inlines) note = try $ do - failIfStrict + guardEnabled Ext_footnotes ref <- noteMarker - state <- getState - let notes = stateNotes state - case lookup ref notes of - Nothing -> fail "note not found" - Just raw -> do - -- We temporarily empty the note list while parsing the note, - -- so that we don't get infinite loops with notes inside notes... - -- Note references inside other notes do not work. - updateState $ \st -> st{ stateNotes = [] } - contents <- parseFromString parseBlocks raw - updateState $ \st -> st{ stateNotes = notes } - return $ Note contents - -inlineNote :: GenParser Char ParserState Inline + return $ do + notes <- asksF stateNotes' + case lookup ref notes of + Nothing -> return $ B.str $ "[^" ++ ref ++ "]" + Just contents -> do + st <- askF + -- process the note in a context that doesn't resolve + -- notes, to avoid infinite looping with notes inside + -- notes: + let contents' = runF contents st{ stateNotes' = [] } + return $ B.note contents' + +inlineNote :: Parser [Char] ParserState (F Inlines) inlineNote = try $ do - failIfStrict + guardEnabled Ext_inline_notes char '^' - contents <- inlinesInBalancedBrackets inline - return $ Note [Para contents] + contents <- inlinesInBalancedBrackets + return $ B.note . B.para <$> contents -rawLaTeXInline' :: GenParser Char ParserState Inline +rawLaTeXInline' :: Parser [Char] ParserState (F Inlines) rawLaTeXInline' = try $ do - failIfStrict + guardEnabled Ext_raw_tex lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env RawInline _ s <- rawLaTeXInline - return $ RawInline "tex" s -- "tex" because it might be context or latex + return $ return $ B.rawInline "tex" s + -- "tex" because it might be context or latex -rawConTeXtEnvironment :: GenParser Char st String +rawConTeXtEnvironment :: Parser [Char] st String rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1274,37 +1536,33 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion -inBrackets :: (GenParser Char st Char) -> GenParser Char st String +inBrackets :: (Parser [Char] st Char) -> Parser [Char] st String inBrackets parser = do char '[' contents <- many parser char ']' return $ "[" ++ contents ++ "]" -rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline :: Parser [Char] ParserState (F Inlines) rawHtmlInline = do - st <- getState - (_,result) <- if stateStrict st - then htmlTag (not . isTextTag) - else htmlTag isInlineTag - return $ RawInline "html" result + guardEnabled Ext_raw_html + mdInHtml <- option False $ + guardEnabled Ext_markdown_in_html_blocks >> return True + (_,result) <- if mdInHtml + then htmlTag isInlineTag + else htmlTag (not . isTextTag) + return $ return $ B.rawInline "html" result -- Citations -cite :: GenParser Char ParserState Inline +cite :: Parser [Char] ParserState (F Inlines) cite = do - failIfStrict + guardEnabled Ext_citations + getOption readerCitations >>= guard . not . null citations <- textualCite <|> normalCite - return $ Cite citations [] + return $ flip B.cite mempty <$> citations -spnl :: GenParser Char st () -spnl = try $ do - skipSpaces - optional newline - skipSpaces - notFollowedBy (char '\n') - -textualCite :: GenParser Char ParserState [Citation] +textualCite :: Parser [Char] ParserState (F [Citation]) textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1314,22 +1572,25 @@ textualCite = try $ do , citationNoteNum = 0 , citationHash = 0 } - rest <- option [] $ try $ spnl >> normalCite - if null rest - then option [first] $ bareloc first - else return $ first : rest + mbrest <- option Nothing $ try $ spnl >> Just <$> normalCite + case mbrest of + Just rest -> return $ (first:) <$> rest + Nothing -> option (return [first]) $ bareloc first -bareloc :: Citation -> GenParser Char ParserState [Citation] +bareloc :: Citation -> Parser [Char] ParserState (F [Citation]) bareloc c = try $ do spnl char '[' suff <- suffix - rest <- option [] $ try $ char ';' >> citeList + rest <- option (return []) $ try $ char ';' >> citeList spnl char ']' - return $ c{ citationSuffix = suff } : rest + return $ do + suff' <- suff + rest' <- rest + return $ c{ citationSuffix = B.toList suff' } : rest' -normalCite :: GenParser Char ParserState [Citation] +normalCite :: Parser [Char] ParserState (F [Citation]) normalCite = try $ do char '[' spnl @@ -1338,7 +1599,7 @@ normalCite = try $ do char ']' return citations -citeKey :: GenParser Char ParserState (Bool, String) +citeKey :: Parser [Char] ParserState (Bool, String) citeKey = try $ do suppress_author <- option False (char '-' >> return True) char '@' @@ -1346,34 +1607,37 @@ citeKey = try $ do let internal p = try $ p >>~ lookAhead (letter <|> digit) rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_?<>~") let key = first:rest - st <- getState - guard $ key `elem` stateCitations st + citations' <- getOption readerCitations + guard $ key `elem` citations' return (suppress_author, key) -suffix :: GenParser Char ParserState [Inline] +suffix :: Parser [Char] ParserState (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl - rest <- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline + rest <- trimInlinesF . mconcat <$> many (notFollowedBy (oneOf ";]") >> inline) return $ if hasSpace - then Space : rest + then (B.space <>) <$> rest else rest -prefix :: GenParser Char ParserState [Inline] -prefix = liftM normalizeSpaces $ +prefix :: Parser [Char] ParserState (F Inlines) +prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: GenParser Char ParserState [Citation] -citeList = sepBy1 citation (try $ char ';' >> spnl) +citeList :: Parser [Char] ParserState (F [Citation]) +citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) -citation :: GenParser Char ParserState Citation +citation :: Parser [Char] ParserState (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey suff <- suffix - return $ Citation{ citationId = key - , citationPrefix = pref - , citationSuffix = suff + return $ do + x <- pref + y <- suff + return $ Citation{ citationId = key + , citationPrefix = B.toList x + , citationSuffix = B.toList y , citationMode = if suppress_author then SuppressAuthor else NormalCitation @@ -1381,3 +1645,22 @@ citation = try $ do , citationHash = 0 } +smart :: Parser [Char] ParserState (F Inlines) +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses]) + +singleQuoted :: Parser [Char] ParserState (F Inlines) +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + fmap B.singleQuoted . trimInlinesF . mconcat <$> + many1Till inline singleQuoteEnd + +doubleQuoted :: Parser [Char] ParserState (F Inlines) +doubleQuoted = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ + fmap B.doubleQuoted . trimInlinesF . mconcat <$> + many1Till inline doubleQuoteEnd diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 2c6fcc6e6..a0e5a0635 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Native Copyright : Copyright (C) 2011 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -31,6 +31,7 @@ Conversion of a string representation of a pandoc type (@Pandoc@, module Text.Pandoc.Readers.Native ( readNative ) where import Text.Pandoc.Definition +import Text.Pandoc.Shared (safeRead) nullMeta :: Meta nullMeta = Meta{ docTitle = [] @@ -51,31 +52,31 @@ nullMeta = Meta{ docTitle = [] readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc readNative s = - case reads s of - (d,_):_ -> d - [] -> Pandoc nullMeta $ readBlocks s + case safeRead s of + Just d -> d + Nothing -> Pandoc nullMeta $ readBlocks s readBlocks :: String -> [Block] readBlocks s = - case reads s of - (d,_):_ -> d - [] -> [readBlock s] + case safeRead s of + Just d -> d + Nothing -> [readBlock s] readBlock :: String -> Block readBlock s = - case reads s of - (d,_):_ -> d - [] -> Plain $ readInlines s + case safeRead s of + Just d -> d + Nothing -> Plain $ readInlines s readInlines :: String -> [Inline] readInlines s = - case reads s of - (d,_):_ -> d - [] -> [readInline s] + case safeRead s of + Just d -> d + Nothing -> [readInline s] readInline :: String -> Inline readInline s = - case reads s of - (d,_):_ -> d - [] -> error "Cannot parse document" + case safeRead s of + Just d -> d + Nothing -> error "Cannot parse document" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index d1010a736..9fb976903 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Readers.RST + Module : Text.Pandoc.Readers.RST Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -27,24 +27,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion from reStructuredText to 'Pandoc' document. -} -module Text.Pandoc.Readers.RST ( +module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing -import Text.ParserCombinators.Parsec -import Control.Monad ( when, liftM ) +import Text.Pandoc.Options +import Control.Monad ( when, liftM, guard, mzero ) import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy ) import qualified Data.Map as M import Text.Printf ( printf ) import Data.Maybe ( catMaybes ) -- | Parse reStructuredText string and return Pandoc document. -readRST :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse (assuming @'\n'@ line endings) +readRST :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readRST state s = (readWith parseRST) state (s ++ "\n\n") +readRST opts s = (readWith parseRST) def{ stateOptions = opts } (s ++ "\n\n") -- -- Constants and data structure definitions @@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221" +specialChars = "\\`|*_<>$:/[]{}()-.\"'\8216\8217\8220\8221" -- -- parsing documents @@ -71,14 +71,14 @@ isHeader _ _ = False -- | Promote all headers in a list of blocks. (Part of -- title transformation for RST.) promoteHeaders :: Int -> [Block] -> [Block] -promoteHeaders num ((Header level text):rest) = +promoteHeaders num ((Header level text):rest) = (Header (level - num) text):(promoteHeaders num rest) promoteHeaders num (other:rest) = other:(promoteHeaders num rest) promoteHeaders _ [] = [] -- | If list of blocks starts with a header (or a header and subheader) -- of level that are not found elsewhere, return it as a title and --- promote all the other headers. +-- promote all the other headers. titleTransform :: [Block] -- ^ list of blocks -> ([Block], [Inline]) -- ^ modified list of blocks, title titleTransform ((Header 1 head1):(Header 2 head2):rest) | @@ -89,7 +89,7 @@ titleTransform ((Header 1 head1):rest) | (promoteHeaders 1 rest, head1) titleTransform blocks = (blocks, []) -parseRST :: GenParser Char ParserState Pandoc +parseRST :: Parser [Char] ParserState Pandoc parseRST = do optional blanklines -- skip blank lines at beginning of file startPos <- getPosition @@ -103,12 +103,13 @@ parseRST = do let reversedNotes = stateNotes st' updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... - blocks <- parseBlocks + blocks <- parseBlocks let blocks' = filter (/= Null) blocks - state <- getState - let (blocks'', title) = if stateStandalone state + standalone <- getOption readerStandalone + let (blocks'', title) = if standalone then titleTransform blocks' else (blocks', []) + state <- getState let authors = stateAuthors state let date = stateDate state let title' = if (null title) then (stateTitle state) else title @@ -118,10 +119,10 @@ parseRST = do -- parsing blocks -- -parseBlocks :: GenParser Char ParserState [Block] +parseBlocks :: Parser [Char] ParserState [Block] parseBlocks = manyTill block eof -block :: GenParser Char ParserState Block +block :: Parser [Char] ParserState Block block = choice [ codeBlock , rawBlock , blockQuote @@ -146,7 +147,7 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: String -> GenParser Char ParserState (String, String) +rawFieldListItem :: String -> Parser [Char] ParserState (String, String) rawFieldListItem indent = try $ do string indent char ':' @@ -160,7 +161,7 @@ rawFieldListItem indent = try $ do return (name, raw) fieldListItem :: String - -> GenParser Char ParserState (Maybe ([Inline], [[Block]])) + -> Parser [Char] ParserState (Maybe ([Inline], [[Block]])) fieldListItem indent = try $ do (name, raw) <- rawFieldListItem indent let term = [Str name] @@ -187,7 +188,7 @@ extractContents [Plain auth] = auth extractContents [Para auth] = auth extractContents _ = [] -fieldList :: GenParser Char ParserState Block +fieldList :: Parser [Char] ParserState Block fieldList = try $ do indent <- lookAhead $ many spaceChar items <- many1 $ fieldListItem indent @@ -199,7 +200,7 @@ fieldList = try $ do -- line block -- -lineBlockLine :: GenParser Char ParserState [Inline] +lineBlockLine :: Parser [Char] ParserState [Inline] lineBlockLine = try $ do char '|' char ' ' <|> lookAhead (char '\n') @@ -210,7 +211,7 @@ lineBlockLine = try $ do then normalizeSpaces line else Str white : normalizeSpaces line -lineBlock :: GenParser Char ParserState Block +lineBlock :: Parser [Char] ParserState Block lineBlock = try $ do lines' <- many1 lineBlockLine blanklines @@ -220,14 +221,14 @@ lineBlock = try $ do -- paragraph block -- -para :: GenParser Char ParserState Block +para :: Parser [Char] ParserState Block para = paraBeforeCodeBlock <|> paraNormal <?> "paragraph" -codeBlockStart :: GenParser Char st Char +codeBlockStart :: Parser [Char] st Char codeBlockStart = string "::" >> blankline >> blankline -- paragraph that ends in a :: starting a code block -paraBeforeCodeBlock :: GenParser Char ParserState Block +paraBeforeCodeBlock :: Parser [Char] ParserState Block paraBeforeCodeBlock = try $ do result <- many1 (notFollowedBy' codeBlockStart >> inline) lookAhead (string "::") @@ -236,21 +237,21 @@ paraBeforeCodeBlock = try $ do else (normalizeSpaces result) ++ [Str ":"] -- regular paragraph -paraNormal :: GenParser Char ParserState Block -paraNormal = try $ do +paraNormal :: Parser [Char] ParserState Block +paraNormal = try $ do result <- many1 inline newline blanklines return $ Para $ normalizeSpaces result -plain :: GenParser Char ParserState Block -plain = many1 inline >>= return . Plain . normalizeSpaces +plain :: Parser [Char] ParserState Block +plain = many1 inline >>= return . Plain . normalizeSpaces -- -- image block -- -imageBlock :: GenParser Char ParserState Block +imageBlock :: Parser [Char] ParserState Block imageBlock = try $ do string ".. image:: " src <- manyTill anyChar newline @@ -265,11 +266,11 @@ imageBlock = try $ do -- header blocks -- -header :: GenParser Char ParserState Block +header :: Parser [Char] ParserState Block header = doubleHeader <|> singleHeader <?> "header" -- a header with lines on top and bottom -doubleHeader :: GenParser Char ParserState Block +doubleHeader :: Parser [Char] ParserState Block doubleHeader = try $ do c <- oneOf underlineChars rest <- many (char c) -- the top line @@ -283,7 +284,7 @@ doubleHeader = try $ do blankline -- spaces and newline count lenTop (char c) -- the bottom line blanklines - -- check to see if we've had this kind of header before. + -- check to see if we've had this kind of header before. -- if so, get appropriate level. if not, add to list. state <- getState let headerTable = stateHeaderTable state @@ -294,8 +295,8 @@ doubleHeader = try $ do return $ Header level (normalizeSpaces txt) -- a header with line on the bottom only -singleHeader :: GenParser Char ParserState Block -singleHeader = try $ do +singleHeader :: Parser [Char] ParserState Block +singleHeader = try $ do notFollowedBy' whitespace txt <- many1 (do {notFollowedBy blankline; inline}) pos <- getPosition @@ -317,7 +318,7 @@ singleHeader = try $ do -- hrule block -- -hrule :: GenParser Char st Block +hrule :: Parser [Char] st Block hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -331,14 +332,14 @@ hrule = try $ do -- -- read a line indented by a given string -indentedLine :: String -> GenParser Char st [Char] +indentedLine :: String -> Parser [Char] st [Char] indentedLine indents = try $ do string indents manyTill anyChar newline -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. -indentedBlock :: GenParser Char st [Char] +indentedBlock :: Parser [Char] st [Char] indentedBlock = try $ do indents <- lookAhead $ many1 spaceChar lns <- many1 $ try $ do b <- option "" blanklines @@ -347,7 +348,7 @@ indentedBlock = try $ do optional blanklines return $ unlines lns -codeBlock :: GenParser Char st Block +codeBlock :: Parser [Char] st Block codeBlock = try $ do codeBlockStart result <- indentedBlock @@ -355,7 +356,7 @@ codeBlock = try $ do -- | The 'code-block' directive (from Sphinx) that allows a language to be -- specified. -customCodeBlock :: GenParser Char st Block +customCodeBlock :: Parser [Char] st Block customCodeBlock = try $ do string ".. code-block:: " language <- manyTill anyChar newline @@ -364,7 +365,7 @@ customCodeBlock = try $ do return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result -figureBlock :: GenParser Char ParserState Block +figureBlock :: Parser [Char] ParserState Block figureBlock = try $ do string ".. figure::" src <- removeLeadingTrailingSpace `fmap` manyTill anyChar newline @@ -372,24 +373,24 @@ figureBlock = try $ do caption <- parseFromString extractCaption body return $ Para [Image caption (src,"")] -extractCaption :: GenParser Char ParserState [Inline] +extractCaption :: Parser [Char] ParserState [Inline] extractCaption = try $ do manyTill anyLine blanklines many inline -- | The 'math' directive (from Sphinx) for display math. -mathBlock :: GenParser Char st Block +mathBlock :: Parser [Char] st Block mathBlock = try $ do string ".. math::" mathBlockMultiline <|> mathBlockOneLine -mathBlockOneLine :: GenParser Char st Block +mathBlockOneLine :: Parser [Char] st Block mathBlockOneLine = try $ do result <- manyTill anyChar newline blanklines return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result] -mathBlockMultiline :: GenParser Char st Block +mathBlockMultiline :: Parser [Char] st Block mathBlockMultiline = try $ do blanklines result <- indentedBlock @@ -404,9 +405,9 @@ mathBlockMultiline = try $ do $ filter (not . null) $ splitBy null lns' return $ Para $ map (Math DisplayMath) eqs -lhsCodeBlock :: GenParser Char ParserState Block +lhsCodeBlock :: Parser [Char] ParserState Block lhsCodeBlock = try $ do - failUnlessLHS + guardEnabled Ext_literate_haskell optional codeBlockStart pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -418,7 +419,7 @@ lhsCodeBlock = try $ do blanklines return $ CodeBlock ("", ["sourceCode", "literate", "haskell"], []) $ intercalate "\n" lns' -birdTrackLine :: GenParser Char st [Char] +birdTrackLine :: Parser [Char] st [Char] birdTrackLine = do char '>' manyTill anyChar newline @@ -427,7 +428,7 @@ birdTrackLine = do -- raw html/latex/etc -- -rawBlock :: GenParser Char st Block +rawBlock :: Parser [Char] st Block rawBlock = try $ do string ".. raw:: " lang <- many1 (letter <|> digit) @@ -439,7 +440,7 @@ rawBlock = try $ do -- block quotes -- -blockQuote :: GenParser Char ParserState Block +blockQuote :: Parser [Char] ParserState Block blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: @@ -450,10 +451,10 @@ blockQuote = do -- list blocks -- -list :: GenParser Char ParserState Block +list :: Parser [Char] ParserState Block list = choice [ bulletList, orderedList, definitionList ] <?> "list" -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]]) definitionListItem = try $ do -- avoid capturing a directive or comment notFollowedBy (try $ char '.' >> char '.') @@ -463,11 +464,11 @@ definitionListItem = try $ do contents <- parseFromString parseBlocks $ raw ++ "\n" return (normalizeSpaces term, [contents]) -definitionList :: GenParser Char ParserState Block +definitionList :: Parser [Char] ParserState Block definitionList = many1 definitionListItem >>= return . DefinitionList -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: GenParser Char st Int +bulletListStart :: Parser [Char] st Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -477,14 +478,14 @@ bulletListStart = try $ do -- parses ordered list start and returns its length (inc following whitespace) orderedListStart :: ListNumberStyle -> ListNumberDelim - -> GenParser Char ParserState Int + -> Parser [Char] ParserState Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) white <- many1 spaceChar return $ markerLen + length white -- parse a line of a list item -listLine :: Int -> GenParser Char ParserState [Char] +listLine :: Int -> Parser [Char] ParserState [Char] listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -492,36 +493,35 @@ listLine markerLength = try $ do return $ line ++ "\n" -- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> GenParser Char ParserState [Char] +indentWith :: Int -> Parser [Char] ParserState [Char] indentWith num = do - state <- getState - let tabStop = stateTabStop state + tabStop <- getOption readerTabStop if (num < tabStop) then count num (char ' ') - else choice [ try (count num (char ' ')), - (try (char '\t' >> count (num - tabStop) (char ' '))) ] + else choice [ try (count num (char ' ')), + (try (char '\t' >> count (num - tabStop) (char ' '))) ] -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState Int - -> GenParser Char ParserState (Int, [Char]) +rawListItem :: Parser [Char] ParserState Int + -> Parser [Char] ParserState (Int, [Char]) rawListItem start = try $ do markerLength <- start firstLine <- manyTill anyChar newline restLines <- many (listLine markerLength) return (markerLength, (firstLine ++ "\n" ++ (concat restLines))) --- continuation of a list item - indented and separated by blankline or --- (in compact lists) endline. +-- continuation of a list item - indented and separated by blankline or +-- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int -> GenParser Char ParserState [Char] +listContinuation :: Int -> Parser [Char] ParserState [Char] listContinuation markerLength = try $ do blanks <- many1 blankline result <- many1 (listLine markerLength) return $ blanks ++ concat result -listItem :: GenParser Char ParserState Int - -> GenParser Char ParserState [Block] -listItem start = try $ do +listItem :: Parser [Char] ParserState Int + -> Parser [Char] ParserState [Block] +listItem start = try $ do (markerLength, first) <- rawListItem start rest <- many (listContinuation markerLength) blanks <- choice [ try (many blankline >>~ lookAhead start), @@ -537,22 +537,22 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return parsed -orderedList :: GenParser Char ParserState Block +orderedList :: Parser [Char] ParserState Block orderedList = try $ do (start, style, delim) <- lookAhead (anyOrderedListMarker >>~ spaceChar) items <- many1 (listItem (orderedListStart style delim)) let items' = compactify items return $ OrderedList (start, style, delim) items' -bulletList :: GenParser Char ParserState Block -bulletList = many1 (listItem bulletListStart) >>= +bulletList :: Parser [Char] ParserState Block +bulletList = many1 (listItem bulletListStart) >>= return . BulletList . compactify -- -- default-role block -- -defaultRoleBlock :: GenParser Char ParserState Block +defaultRoleBlock :: Parser [Char] ParserState Block defaultRoleBlock = try $ do string ".. default-role::" -- doesn't enforce any restrictions on the role name; embedded spaces shouldn't be allowed, for one @@ -570,7 +570,7 @@ defaultRoleBlock = try $ do -- unknown directive (e.g. comment) -- -unknownDirective :: GenParser Char st Block +unknownDirective :: Parser [Char] st Block unknownDirective = try $ do string ".." notFollowedBy (noneOf " \t\n") @@ -582,7 +582,7 @@ unknownDirective = try $ do --- note block --- -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parser [Char] ParserState [Char] noteBlock = try $ do startPos <- getPosition string ".." @@ -601,7 +601,7 @@ noteBlock = try $ do -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -noteMarker :: GenParser Char ParserState [Char] +noteMarker :: Parser [Char] ParserState [Char] noteMarker = do char '[' res <- many1 digit @@ -614,13 +614,13 @@ noteMarker = do -- reference key -- -quotedReferenceName :: GenParser Char ParserState [Inline] +quotedReferenceName :: Parser [Char] ParserState [Inline] quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - label' <- many1Till inline (char '`') + label' <- many1Till inline (char '`') return label' -unquotedReferenceName :: GenParser Char ParserState [Inline] +unquotedReferenceName :: Parser [Char] ParserState [Inline] unquotedReferenceName = try $ do label' <- many1Till inline (lookAhead $ char ':') return label' @@ -629,24 +629,24 @@ unquotedReferenceName = try $ do -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName' :: GenParser Char st String +simpleReferenceName' :: Parser [Char] st String simpleReferenceName' = do x <- alphaNum xs <- many $ alphaNum <|> (try $ oneOf "-_:+." >> lookAhead alphaNum) return (x:xs) -simpleReferenceName :: GenParser Char st [Inline] +simpleReferenceName :: Parser [Char] st [Inline] simpleReferenceName = do raw <- simpleReferenceName' return [Str raw] -referenceName :: GenParser Char ParserState [Inline] +referenceName :: Parser [Char] ParserState [Inline] referenceName = quotedReferenceName <|> (try $ simpleReferenceName >>~ lookAhead (char ':')) <|> unquotedReferenceName -referenceKey :: GenParser Char ParserState [Char] +referenceKey :: Parser [Char] ParserState [Char] referenceKey = do startPos <- getPosition (key, target) <- choice [imageKey, anonymousKey, regularKey] @@ -658,38 +658,43 @@ referenceKey = do -- return enough blanks to replace key return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -targetURI :: GenParser Char st [Char] +targetURI :: Parser [Char] st [Char] targetURI = do skipSpaces optional newline - contents <- many1 (try (many spaceChar >> newline >> + contents <- many1 (try (many spaceChar >> newline >> many1 spaceChar >> noneOf " \t\n") <|> noneOf "\n") blanklines return $ escapeURI $ removeLeadingTrailingSpace $ contents -imageKey :: GenParser Char ParserState (Key, Target) +imageKey :: Parser [Char] ParserState (Key, Target) imageKey = try $ do string ".. |" - ref <- manyTill inline (char '|') + (_,ref) <- withRaw (manyTill inline (char '|')) skipSpaces string "image::" src <- targetURI - return (toKey (normalizeSpaces ref), (src, "")) + return (toKey $ init ref, (src, "")) -anonymousKey :: GenParser Char st (Key, Target) +anonymousKey :: Parser [Char] st (Key, Target) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition - return (toKey [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, "")) + return (toKey $ "_" ++ printf "%09d" (sourceLine pos), (src, "")) -regularKey :: GenParser Char ParserState (Key, Target) +stripTicks :: String -> String +stripTicks = reverse . stripTick . reverse . stripTick + where stripTick ('`':xs) = xs + stripTick xs = xs + +regularKey :: Parser [Char] ParserState (Key, Target) regularKey = try $ do string ".. _" - ref <- referenceName + (_,ref) <- withRaw referenceName char ':' src <- targetURI - return (toKey (normalizeSpaces ref), (src, "")) + return (toKey $ stripTicks ref, (src, "")) -- -- tables @@ -702,37 +707,37 @@ regularKey = try $ do -- Simple tables TODO: -- - column spans -- - multiline support --- - ensure that rightmost column span does not need to reach end +-- - ensure that rightmost column span does not need to reach end -- - require at least 2 columns -- -- Grid tables TODO: -- - column spans -dashedLine :: Char -> GenParser Char st (Int, Int) +dashedLine :: Char -> Parser [Char] st (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] +simpleDashedLines :: Char -> Parser [Char] st [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator -simpleTableSep :: Char -> GenParser Char ParserState Char +simpleTableSep :: Char -> Parser [Char] ParserState Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: GenParser Char ParserState [Char] +simpleTableFooter :: Parser [Char] ParserState [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] +simpleTableRawLine :: [Int] -> Parser [Char] ParserState [String] simpleTableRawLine indices = do line <- many1Till anyChar newline return (simpleTableSplitLine indices line) -- Parse a table row and return a list of blocks (columns). -simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] +simpleTableRow :: [Int] -> Parser [Char] ParserState [[Block]] simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices @@ -745,8 +750,8 @@ simpleTableSplitLine indices line = map removeLeadingTrailingSpace $ tail $ splitByIndices (init indices) line -simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) +simpleTableHeader :: Bool -- ^ Headerless table + -> Parser [Char] ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -766,28 +771,28 @@ simpleTableHeader headless = try $ do -- Parse a simple table. simpleTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block + -> Parser [Char] ParserState Block simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return []) + Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l where sep = return () -- optional (simpleTableSep '-') gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable = gridTableWith block (return []) + -> Parser [Char] ParserState Block +gridTable = gridTableWith parseBlocks -table :: GenParser Char ParserState Block +table :: Parser [Char] ParserState Block table = gridTable False <|> simpleTable False <|> gridTable True <|> simpleTable True <?> "table" - -- + -- -- inline -- -inline :: GenParser Char ParserState Inline +inline :: Parser [Char] ParserState Inline inline = choice [ whitespace , link , str @@ -805,44 +810,53 @@ inline = choice [ whitespace , escapedChar , symbol ] <?> "inline" -hyphens :: GenParser Char ParserState Inline +hyphens :: Parser [Char] ParserState Inline hyphens = do result <- many1 (char '-') - option Space endline + option Space endline -- don't want to treat endline after hyphen or dash as a space return $ Str result -escapedChar :: GenParser Char st Inline +escapedChar :: Parser [Char] st Inline escapedChar = do c <- escaped anyChar return $ if c == ' ' -- '\ ' is null in RST then Str "" else Str [c] -symbol :: GenParser Char ParserState Inline -symbol = do +symbol :: Parser [Char] ParserState Inline +symbol = do result <- oneOf specialChars return $ Str [result] -- parses inline code, between codeStart and codeEnd -code :: GenParser Char ParserState Inline -code = try $ do +code :: Parser [Char] ParserState Inline +code = try $ do string "``" result <- manyTill anyChar (try (string "``")) return $ Code nullAttr $ removeLeadingTrailingSpace $ intercalate " " $ lines result -emph :: GenParser Char ParserState Inline -emph = enclosed (char '*') (char '*') inline >>= +-- succeeds only if we're not right after a str (ie. in middle of word) +atStart :: Parser [Char] ParserState a -> Parser [Char] ParserState a +atStart p = do + pos <- getPosition + st <- getState + -- single quote start can't be right after str + guard $ stateLastStrPos st /= Just pos + p + +emph :: Parser [Char] ParserState Inline +emph = enclosed (atStart $ char '*') (char '*') inline >>= return . Emph . normalizeSpaces -strong :: GenParser Char ParserState Inline -strong = enclosed (string "**") (try $ string "**") inline >>= +strong :: Parser [Char] ParserState Inline +strong = enclosed (atStart $ string "**") (try $ string "**") inline >>= return . Strong . normalizeSpaces -- Parses inline interpreted text which is required to have the given role. -- This decision is based on the role marker (if present), -- and the current default interpreted text role. -interpreted :: [Char] -> GenParser Char ParserState [Char] +interpreted :: [Char] -> Parser [Char] ParserState [Char] interpreted role = try $ do state <- getState if role == stateRstDefaultRole state @@ -856,30 +870,30 @@ interpreted role = try $ do -- http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules -- but it should be good enough for most purposes unmarkedInterpretedText = do - result <- enclosed (char '`') (char '`') anyChar + result <- enclosed (atStart $ char '`') (char '`') anyChar return result -superscript :: GenParser Char ParserState Inline +superscript :: Parser [Char] ParserState Inline superscript = interpreted "sup" >>= \x -> return (Superscript [Str x]) -subscript :: GenParser Char ParserState Inline +subscript :: Parser [Char] ParserState Inline subscript = interpreted "sub" >>= \x -> return (Subscript [Str x]) -math :: GenParser Char ParserState Inline +math :: Parser [Char] ParserState Inline math = interpreted "math" >>= \x -> return (Math InlineMath x) -whitespace :: GenParser Char ParserState Inline +whitespace :: Parser [Char] ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" -str :: GenParser Char ParserState Inline +str :: Parser [Char] ParserState Inline str = do - result <- many1 (noneOf (specialChars ++ "\t\n ")) - pos <- getPosition - updateState $ \s -> s{ stateLastStrPos = Just pos } + let strChar = noneOf ("\t\n " ++ specialChars) + result <- many1 strChar + updateLastStrPos return $ Str result -- an endline character that can be treated as a space, not a structural break -endline :: GenParser Char ParserState Inline +endline :: Parser [Char] ParserState Inline endline = try $ do newline notFollowedBy blankline @@ -895,14 +909,14 @@ endline = try $ do -- links -- -link :: GenParser Char ParserState Inline +link :: Parser [Char] ParserState Inline link = choice [explicitLink, referenceLink, autoLink] <?> "link" -explicitLink :: GenParser Char ParserState Inline +explicitLink :: Parser [Char] ParserState Inline explicitLink = try $ do char '`' notFollowedBy (char '`') -- `` marks start of inline code - label' <- manyTill (notFollowedBy (char '`') >> inline) + label' <- manyTill (notFollowedBy (char '`') >> inline) (try (spaces >> char '<')) src <- manyTill (noneOf ">\n") (char '>') skipSpaces @@ -910,53 +924,53 @@ explicitLink = try $ do return $ Link (normalizeSpaces label') (escapeURI $ removeLeadingTrailingSpace src, "") -referenceLink :: GenParser Char ParserState Inline +referenceLink :: Parser [Char] ParserState Inline referenceLink = try $ do - label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' + (label',ref) <- withRaw (quotedReferenceName <|> simpleReferenceName) >>~ + char '_' state <- getState let keyTable = stateKeys state - let isAnonKey x = case fromKey x of - [Str ('_':_)] -> True - _ -> False - key <- option (toKey label') $ + let isAnonKey (Key ('_':_)) = True + isAnonKey _ = False + key <- option (toKey $ stripTicks ref) $ do char '_' let anonKeys = sort $ filter isAnonKey $ M.keys keyTable if null anonKeys - then pzero + then mzero else return (head anonKeys) - (src,tit) <- case lookupKeySrc keyTable key of + (src,tit) <- case M.lookup key keyTable of Nothing -> fail "no corresponding key" Just target -> return target -- if anonymous link, remove key so it won't be used again when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } - return $ Link (normalizeSpaces label') (src, tit) + return $ Link (normalizeSpaces label') (src, tit) -autoURI :: GenParser Char ParserState Inline +autoURI :: Parser [Char] ParserState Inline autoURI = do (orig, src) <- uri return $ Link [Str orig] (src, "") -autoEmail :: GenParser Char ParserState Inline +autoEmail :: Parser [Char] ParserState Inline autoEmail = do (orig, src) <- emailAddress return $ Link [Str orig] (src, "") -autoLink :: GenParser Char ParserState Inline +autoLink :: Parser [Char] ParserState Inline autoLink = autoURI <|> autoEmail -- For now, we assume that all substitution references are for images. -image :: GenParser Char ParserState Inline +image :: Parser [Char] ParserState Inline image = try $ do char '|' - ref <- manyTill inline (char '|') + (alt,ref) <- withRaw (manyTill inline (char '|')) state <- getState let keyTable = stateKeys state - (src,tit) <- case lookupKeySrc keyTable (toKey ref) of + (src,tit) <- case M.lookup (toKey $ init ref) keyTable of Nothing -> fail "no corresponding key" Just target -> return target - return $ Image (normalizeSpaces ref) (src, tit) + return $ Image (normalizeSpaces alt) (src, tit) -note :: GenParser Char ParserState Inline +note :: Parser [Char] ParserState Inline note = try $ do ref <- noteMarker char '_' diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 67dfe6753..fe49a992e 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.TeXMath Copyright : Copyright (C) 2007-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 348900d38..89f281ae8 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Textile Copyright : Copyright (C) 2010-2012 Paul Rivier and John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : Paul Rivier <paul*rivier#demotera*com> Stability : alpha @@ -56,29 +56,34 @@ TODO : refactor common patterns across readers : module Text.Pandoc.Readers.Textile ( readTextile) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) -import Text.ParserCombinators.Parsec import Text.HTML.TagSoup.Match import Data.Char ( digitToInt, isUpper ) import Control.Monad ( guard, liftM ) import Control.Applicative ((<$>), (*>), (<*)) -- | Parse a Textile text and return a Pandoc document. -readTextile :: ParserState -- ^ Parser state, including options for parser - -> String -- ^ String to parse (assuming @'\n'@ line endings) - -> Pandoc -readTextile state s = - (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n") +readTextile :: ReaderOptions -- ^ Reader options + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readTextile opts s = + (readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n") -- | Generate a Pandoc ADT from a textile document -parseTextile :: GenParser Char ParserState Pandoc +parseTextile :: Parser [Char] ParserState Pandoc parseTextile = do -- textile allows raw HTML and does smart punctuation by default - updateState (\state -> state { stateParseRaw = True, stateSmart = True }) + oldOpts <- stateOptions `fmap` getState + updateState $ \state -> state{ stateOptions = + oldOpts{ readerSmart = True + , readerParseRaw = True + , readerOldDashes = True + } } many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes @@ -93,10 +98,10 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc (Meta [] [] []) blocks -- FIXME -noteMarker :: GenParser Char ParserState [Char] +noteMarker :: Parser [Char] ParserState [Char] noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') -noteBlock :: GenParser Char ParserState [Char] +noteBlock :: Parser [Char] ParserState [Char] noteBlock = try $ do startPos <- getPosition ref <- noteMarker @@ -111,11 +116,11 @@ noteBlock = try $ do return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -- | Parse document blocks -parseBlocks :: GenParser Char ParserState [Block] +parseBlocks :: Parser [Char] ParserState [Block] parseBlocks = manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: [GenParser Char ParserState Block] +blockParsers :: [Parser [Char] ParserState Block] blockParsers = [ codeBlock , header , blockQuote @@ -128,20 +133,20 @@ blockParsers = [ codeBlock , nullBlock ] -- | Any block in the order of definition of blockParsers -block :: GenParser Char ParserState Block +block :: Parser [Char] ParserState Block block = choice blockParsers <?> "block" -codeBlock :: GenParser Char ParserState Block +codeBlock :: Parser [Char] ParserState Block codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: GenParser Char ParserState Block +codeBlockBc :: Parser [Char] ParserState Block codeBlockBc = try $ do string "bc. " contents <- manyTill anyLine blanklines return $ CodeBlock ("",[],[]) $ unlines contents -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: GenParser Char ParserState Block +codeBlockPre :: Parser [Char] ParserState Block codeBlockPre = try $ do htmlTag (tagOpen (=="pre") null) result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak) @@ -156,7 +161,7 @@ codeBlockPre = try $ do return $ CodeBlock ("",[],[]) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: GenParser Char ParserState Block +header :: Parser [Char] ParserState Block header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -165,14 +170,14 @@ header = try $ do return $ Header level name -- | Blockquote of the form "bq. content" -blockQuote :: GenParser Char ParserState Block +blockQuote :: Parser [Char] ParserState Block blockQuote = try $ do string "bq" >> optional attributes >> char '.' >> whitespace BlockQuote . singleton <$> para -- Horizontal rule -hrule :: GenParser Char st Block +hrule :: Parser [Char] st Block hrule = try $ do skipSpaces start <- oneOf "-*" @@ -187,39 +192,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: GenParser Char ParserState Block +anyList :: Parser [Char] ParserState Block anyList = try $ ( (anyListAtDepth 1) <* blanklines ) -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: Int -> GenParser Char ParserState Block +anyListAtDepth :: Int -> Parser [Char] ParserState Block anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: Int -> GenParser Char ParserState Block +bulletListAtDepth :: Int -> Parser [Char] ParserState Block bulletListAtDepth depth = try $ BulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block] +bulletListItemAtDepth :: Int -> Parser [Char] ParserState [Block] bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: Int -> GenParser Char ParserState Block +orderedListAtDepth :: Int -> Parser [Char] ParserState Block orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return (OrderedList (1, DefaultStyle, DefaultDelim) items) -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block] +orderedListItemAtDepth :: Int -> Parser [Char] ParserState [Block] orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: Char -> Int -> GenParser Char ParserState [Block] +genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState [Block] genericListItemAtDepth c depth = try $ do count depth (char c) >> optional attributes >> whitespace p <- inlines @@ -227,22 +232,22 @@ genericListItemAtDepth c depth = try $ do return ((Plain p):sublist) -- | A definition list is a set of consecutive definition items -definitionList :: GenParser Char ParserState Block +definitionList :: Parser [Char] ParserState Block definitionList = try $ DefinitionList <$> many1 definitionListItem - + -- | A definition list item in textile begins with '- ', followed by -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]]) definitionListItem = try $ do string "- " term <- many1Till inline (try (whitespace >> string ":=")) - def <- inlineDef <|> multilineDef - return (term, def) - where inlineDef :: GenParser Char ParserState [[Block]] + def' <- inlineDef <|> multilineDef + return (term, def') + where inlineDef :: Parser [Char] ParserState [[Block]] inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines) - multilineDef :: GenParser Char ParserState [[Block]] + multilineDef :: Parser [Char] ParserState [[Block]] multilineDef = try $ do optional whitespace >> newline s <- many1Till anyChar (try (string "=:" >> newline)) @@ -252,76 +257,76 @@ definitionListItem = try $ do -- | This terminates a block such as a paragraph. Because of raw html -- blocks support, we have to lookAhead for a rawHtmlBlock. -blockBreak :: GenParser Char ParserState () +blockBreak :: Parser [Char] ParserState () blockBreak = try (newline >> blanklines >> return ()) <|> (lookAhead rawHtmlBlock >> return ()) -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: GenParser Char ParserState Block +rawHtmlBlock :: Parser [Char] ParserState Block rawHtmlBlock = try $ do (_,b) <- htmlTag isBlockTag optional blanklines return $ RawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: GenParser Char ParserState Block +rawLaTeXBlock' :: Parser [Char] ParserState Block rawLaTeXBlock' = do - failIfStrict + guardEnabled Ext_raw_tex RawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: GenParser Char ParserState Block +para :: Parser [Char] ParserState Block para = try $ Para . normalizeSpaces <$> manyTill inline blockBreak -- Tables - + -- | A table cell spans until a pipe | -tableCell :: GenParser Char ParserState TableCell +tableCell :: Parser [Char] ParserState TableCell tableCell = do c <- many1 (noneOf "|\n") content <- parseFromString (many1 inline) c return $ [ Plain $ normalizeSpaces content ] -- | A table row is made of many table cells -tableRow :: GenParser Char ParserState [TableCell] +tableRow :: Parser [Char] ParserState [TableCell] tableRow = try $ ( char '|' *> (endBy1 tableCell (char '|')) <* newline) -- | Many table rows -tableRows :: GenParser Char ParserState [[TableCell]] +tableRows :: Parser [Char] ParserState [[TableCell]] tableRows = many1 tableRow -- | Table headers are made of cells separated by a tag "|_." -tableHeaders :: GenParser Char ParserState [TableCell] +tableHeaders :: Parser [Char] ParserState [TableCell] tableHeaders = let separator = (try $ string "|_.") in try $ ( separator *> (sepBy1 tableCell separator) <* char '|' <* newline ) - + -- | A table with an optional header. Current implementation can -- handle tables with and without header, but will parse cells -- alignment attributes as content. -table :: GenParser Char ParserState Block +table :: Parser [Char] ParserState Block table = try $ do headers <- option [] tableHeaders rows <- tableRows blanklines let nbOfCols = max (length headers) (length $ head rows) - return $ Table [] + return $ Table [] (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0) headers rows - + -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: String -- ^ block tag name - -> GenParser Char ParserState Block -- ^ implicit block - -> GenParser Char ParserState Block + -> Parser [Char] ParserState Block -- ^ implicit block + -> Parser [Char] ParserState Block maybeExplicitBlock name blk = try $ do - optional $ try $ string name >> optional attributes >> char '.' >> + optional $ try $ string name >> optional attributes >> char '.' >> ((try whitespace) <|> endline) blk @@ -333,15 +338,15 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: GenParser Char ParserState Inline +inline :: Parser [Char] ParserState Inline inline = choice inlineParsers <?> "inline" -- | List of consecutive inlines before a newline -inlines :: GenParser Char ParserState [Inline] +inlines :: Parser [Char] ParserState [Inline] inlines = manyTill inline newline -- | Inline parsers tried in order -inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers :: [Parser [Char] ParserState Inline] inlineParsers = [ autoLink , str , whitespace @@ -362,7 +367,7 @@ inlineParsers = [ autoLink ] -- | Inline markups -inlineMarkup :: GenParser Char ParserState Inline +inlineMarkup :: Parser [Char] ParserState Inline inlineMarkup = choice [ simpleInline (string "??") (Cite []) , simpleInline (string "**") Strong , simpleInline (string "__") Emph @@ -375,29 +380,29 @@ inlineMarkup = choice [ simpleInline (string "??") (Cite []) ] -- | Trademark, registered, copyright -mark :: GenParser Char st Inline +mark :: Parser [Char] st Inline mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: GenParser Char st Inline +reg :: Parser [Char] st Inline reg = do oneOf "Rr" char ')' return $ Str "\174" -tm :: GenParser Char st Inline +tm :: Parser [Char] st Inline tm = do oneOf "Tt" oneOf "Mm" char ')' return $ Str "\8482" -copy :: GenParser Char st Inline +copy :: Parser [Char] st Inline copy = do oneOf "Cc" char ')' return $ Str "\169" -note :: GenParser Char ParserState Inline +note :: Parser [Char] ParserState Inline note = try $ do ref <- (char '[' *> many1 digit <* char ']') notes <- stateNotes <$> getState @@ -405,7 +410,7 @@ note = try $ do Nothing -> fail "note not found" Just raw -> liftM Note $ parseFromString parseBlocks raw --- | Special chars +-- | Special chars markupChars :: [Char] markupChars = "\\[]*#_@~-+^|%=" @@ -421,17 +426,17 @@ wordBoundaries :: [Char] wordBoundaries = markupChars ++ stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: GenParser Char ParserState String +hyphenedWords :: Parser [Char] ParserState String hyphenedWords = try $ do hd <- noneOf wordBoundaries - tl <- many ( (noneOf wordBoundaries) <|> + tl <- many ( (noneOf wordBoundaries) <|> try (oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) ) let wd = hd:tl - option wd $ try $ + option wd $ try $ (\r -> concat [wd, "-", r]) <$> (char '-' *> hyphenedWords) -- | Any string -str :: GenParser Char ParserState Inline +str :: Parser [Char] ParserState Inline str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediatly @@ -444,44 +449,57 @@ str = do return $ Str fullStr -- | Textile allows HTML span infos, we discard them -htmlSpan :: GenParser Char ParserState Inline +htmlSpan :: Parser [Char] ParserState Inline htmlSpan = try $ Str <$> ( char '%' *> attributes *> manyTill anyChar (char '%') ) -- | Some number of space chars -whitespace :: GenParser Char ParserState Inline +whitespace :: Parser [Char] ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: GenParser Char ParserState Inline +endline :: Parser [Char] ParserState Inline endline = try $ do newline >> notFollowedBy blankline return LineBreak -rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline :: Parser [Char] ParserState Inline rawHtmlInline = RawInline "html" . snd <$> htmlTag isInlineTag - --- | Raw LaTeX Inline -rawLaTeXInline' :: GenParser Char ParserState Inline + +-- | Raw LaTeX Inline +rawLaTeXInline' :: Parser [Char] ParserState Inline rawLaTeXInline' = try $ do - failIfStrict + guardEnabled Ext_raw_tex rawLaTeXInline --- | Textile standard link syntax is "label":target -link :: GenParser Char ParserState Inline -link = try $ do +-- | Textile standard link syntax is "label":target. But we +-- can also have ["label":target]. +link :: Parser [Char] ParserState Inline +link = linkB <|> linkNoB + +linkNoB :: Parser [Char] ParserState Inline +linkNoB = try $ do + name <- surrounded (char '"') inline + char ':' + let stopChars = "!.,;:" + url <- manyTill nonspaceChar (lookAhead $ space <|> try (oneOf stopChars >> (space <|> newline))) + return $ Link name (url, "") + +linkB :: Parser [Char] ParserState Inline +linkB = try $ do + char '[' name <- surrounded (char '"') inline char ':' - url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;,:" >> (space <|> newline)))) + url <- manyTill nonspaceChar (char ']') return $ Link name (url, "") -- | Detect plain links to http or email. -autoLink :: GenParser Char ParserState Inline +autoLink :: Parser [Char] ParserState Inline autoLink = do (orig, src) <- (try uri <|> try emailAddress) return $ Link [Str orig] (src, "") -- | image embedding -image :: GenParser Char ParserState Inline +image :: Parser [Char] ParserState Inline image = try $ do char '!' >> notFollowedBy space src <- manyTill anyChar (lookAhead $ oneOf "!(") @@ -489,49 +507,49 @@ image = try $ do char '!' return $ Image [Str alt] (src, alt) -escapedInline :: GenParser Char ParserState Inline +escapedInline :: Parser [Char] ParserState Inline escapedInline = escapedEqs <|> escapedTag -escapedEqs :: GenParser Char ParserState Inline +escapedEqs :: Parser [Char] ParserState Inline escapedEqs = Str <$> (try $ string "==" *> manyTill anyChar (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: GenParser Char ParserState Inline +escapedTag :: Parser [Char] ParserState Inline escapedTag = Str <$> (try $ string "<notextile>" *> manyTill anyChar (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: GenParser Char ParserState Inline +symbol :: Parser [Char] ParserState Inline symbol = Str . singleton <$> oneOf wordBoundaries -- | Inline code -code :: GenParser Char ParserState Inline +code :: Parser [Char] ParserState Inline code = code1 <|> code2 -code1 :: GenParser Char ParserState Inline +code1 :: Parser [Char] ParserState Inline code1 = Code nullAttr <$> surrounded (char '@') anyChar -code2 :: GenParser Char ParserState Inline +code2 :: Parser [Char] ParserState Inline code2 = do htmlTag (tagOpen (=="tt") null) Code nullAttr <$> manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: GenParser Char ParserState String +attributes :: Parser [Char] ParserState String attributes = choice [ enclosed (char '(') (char ')') anyChar, enclosed (char '{') (char '}') anyChar, enclosed (char '[') (char ']') anyChar] -- | Parses material surrounded by a parser. -surrounded :: GenParser Char st t -- ^ surrounding parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] +surrounded :: Parser [Char] st t -- ^ surrounding parser + -> Parser [Char] st a -- ^ content parser (to be used repeatedly) + -> Parser [Char] st [a] surrounded border = enclosed border (try border) -- | Inlines are most of the time of the same form -simpleInline :: GenParser Char ParserState t -- ^ surrounding parser +simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser -> ([Inline] -> Inline) -- ^ Inline constructor - -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly) + -> Parser [Char] ParserState Inline -- ^ content parser (to be used repeatedly) simpleInline border construct = surrounded border (inlineWithAttribute) >>= return . construct . normalizeSpaces where inlineWithAttribute = (try $ optional attributes) >> inline diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index a80ab0c63..7a21f6f3a 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -42,7 +42,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>)) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (findDataFile) +import Text.Pandoc.Shared (findDataFile, renderTags') import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) @@ -102,14 +102,14 @@ convertTag userdata t@(TagOpen "script" as) = src -> do (raw, mime) <- getRaw userdata (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) - return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) + return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) convertTag userdata t@(TagOpen "link" as) = case fromAttrib "href" t of [] -> return t src -> do (raw, mime) <- getRaw userdata (fromAttrib "type" t) src let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) - return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) + return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) convertTag _ t = return t cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString @@ -163,14 +163,3 @@ makeSelfContained userdata inp = do out' <- mapM (convertTag userdata) tags return $ renderTags' out' --- repeated from HTML reader: -renderTags' :: [Tag String] -> String -renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index f14a57c1f..d86f9a390 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Shared Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -59,13 +59,8 @@ module Text.Pandoc.Shared ( uniqueIdent, isHeaderBlock, headerShift, - -- * Writer options - HTMLMathMethod (..), - CiteMethod (..), - ObfuscationMethod (..), - HTMLSlideVariant (..), - WriterOptions (..), - defaultWriterOptions, + -- * TagSoup HTML handling + renderTags', -- * File handling inDirectory, findDataFile, @@ -73,6 +68,8 @@ module Text.Pandoc.Shared ( -- * Error handling err, warn, + -- * Safe read + safeRead ) where import Text.Pandoc.Definition @@ -90,11 +87,12 @@ import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S import Control.Monad (msum) import Paths_pandoc (getDataFileName) -import Text.Pandoc.Highlighting (Style, pygments) import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) import Data.Time import System.IO (stderr) +import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), + renderOptions) -- -- List processing @@ -149,7 +147,7 @@ backslashEscapes = map (\ch -> (ch, ['\\',ch])) -- characters and strings. escapeStringUsing :: [(Char, String)] -> String -> String escapeStringUsing _ [] = "" -escapeStringUsing escapeTable (x:xs) = +escapeStringUsing escapeTable (x:xs) = case (lookup x escapeTable) of Just str -> str ++ rest Nothing -> x:rest @@ -176,7 +174,7 @@ stripFirstAndLast :: String -> String stripFirstAndLast str = drop 1 $ take ((length str) - 1) str --- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). +-- | Change CamelCase word to hyphenated lowercase (e.g., camel-case). camelCaseToHyphenated :: String -> String camelCaseToHyphenated [] = "" camelCaseToHyphenated (a:b:rest) | isLower a && isUpper b = @@ -247,13 +245,13 @@ normalizeDate s = fmap (formatTime defaultTimeLocale "%F") -- | Generate infinite lazy list of markers for an ordered list, -- depending on list attributes. orderedListMarkers :: (Int, ListNumberStyle, ListNumberDelim) -> [String] -orderedListMarkers (start, numstyle, numdelim) = +orderedListMarkers (start, numstyle, numdelim) = let singleton c = [c] nums = case numstyle of DefaultStyle -> map show [start..] Example -> map show [start..] Decimal -> map show [start..] - UpperAlpha -> drop (start - 1) $ cycle $ + UpperAlpha -> drop (start - 1) $ cycle $ map singleton ['A'..'Z'] LowerAlpha -> drop (start - 1) $ cycle $ map singleton ['a'..'z'] @@ -271,13 +269,12 @@ orderedListMarkers (start, numstyle, numdelim) = -- remove empty Str elements. normalizeSpaces :: [Inline] -> [Inline] normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty - where cleanup [] = [] - cleanup (Space:rest) = let rest' = dropWhile isSpaceOrEmpty rest - in case rest' of - [] -> [] - _ -> Space : cleanup rest' + where cleanup [] = [] + cleanup (Space:rest) = case dropWhile isSpaceOrEmpty rest of + [] -> [] + (x:xs) -> Space : x : cleanup xs cleanup ((Str ""):rest) = cleanup rest - cleanup (x:rest) = x : cleanup rest + cleanup (x:rest) = x : cleanup rest isSpaceOrEmpty :: Inline -> Bool isSpaceOrEmpty Space = True @@ -386,7 +383,7 @@ isPara (Para _) = True isPara _ = False -- | Data structure for defining hierarchical Pandoc documents -data Element = Blk Block +data Element = Blk Block | Sec Int [Int] String [Inline] [Element] -- lvl num ident label contents deriving (Eq, Read, Show, Typeable, Data) @@ -414,7 +411,7 @@ hierarchicalizeWithIds ((Header level title'):xs) = do let ident = uniqueIdent title' usedIdents let lastnum' = take level lastnum let newnum = if length lastnum' >= level - then init lastnum' ++ [last lastnum' + 1] + then init lastnum' ++ [last lastnum' + 1] else lastnum ++ replicate (level - length lastnum - 1) 0 ++ [1] S.put (newnum, (ident : usedIdents)) let (sectionContents, rest) = break (headerLtEq level) xs @@ -456,112 +453,20 @@ headerShift n = bottomUp shift shift x = x -- --- Writer options +-- TagSoup HTML handling -- -data HTMLMathMethod = PlainMath - | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js - | JsMath (Maybe String) -- url of jsMath load script - | GladTeX - | WebTeX String -- url of TeX->image script. - | MathML (Maybe String) -- url of MathMLinHTML.js - | MathJax String -- url of MathJax.js - deriving (Show, Read, Eq) - -data CiteMethod = Citeproc -- use citeproc to render them - | Natbib -- output natbib cite commands - | Biblatex -- output biblatex cite commands - deriving (Show, Read, Eq) - --- | Methods for obfuscating email addresses in HTML. -data ObfuscationMethod = NoObfuscation - | ReferenceObfuscation - | JavascriptObfuscation - deriving (Show, Read, Eq) - --- | Varieties of HTML slide shows. -data HTMLSlideVariant = S5Slides - | SlidySlides - | SlideousSlides - | DZSlides - | NoSlides - deriving (Show, Read, Eq) - --- | Options for writers -data WriterOptions = WriterOptions - { writerStandalone :: Bool -- ^ Include header and footer - , writerTemplate :: String -- ^ Template to use in standalone mode - , writerVariables :: [(String, String)] -- ^ Variables to set in template - , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB - , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs - , writerTableOfContents :: Bool -- ^ Include table of contents - , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5, Slidy or Slideous? - , writerIncremental :: Bool -- ^ True if lists should be incremental - , writerXeTeX :: Bool -- ^ Create latex suitable for use by xetex - , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML - , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) - , writerNumberSections :: Bool -- ^ Number sections in LaTeX - , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML - , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax - , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst - , writerWrapText :: Bool -- ^ Wrap text to line length - , writerColumns :: Int -- ^ Characters in a line (for text wrapping) - , writerLiterateHaskell :: Bool -- ^ Write as literate haskell - , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails - , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML - , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file - , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory - , writerCiteMethod :: CiteMethod -- ^ How to print cites - , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations - , writerHtml5 :: Bool -- ^ Produce HTML5 - , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show - , writerSlideLevel :: Maybe Int -- ^ Force header level of slides - , writerChapters :: Bool -- ^ Use "chapter" for top-level sects - , writerListings :: Bool -- ^ Use listings package for code - , writerHighlight :: Bool -- ^ Highlight source code - , writerHighlightStyle :: Style -- ^ Style to use for highlighting - , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown - , writerTeXLigatures :: Bool -- ^ Use tex ligatures quotes, dashes in latex - } deriving Show - -{-# DEPRECATED writerXeTeX "writerXeTeX no longer does anything" #-} --- | Default writer options. -defaultWriterOptions :: WriterOptions -defaultWriterOptions = - WriterOptions { writerStandalone = False - , writerTemplate = "" - , writerVariables = [] - , writerEPUBMetadata = "" - , writerTabStop = 4 - , writerTableOfContents = False - , writerSlideVariant = NoSlides - , writerIncremental = False - , writerXeTeX = False - , writerHTMLMathMethod = PlainMath - , writerIgnoreNotes = False - , writerNumberSections = False - , writerSectionDivs = False - , writerStrictMarkdown = False - , writerReferenceLinks = False - , writerWrapText = True - , writerColumns = 72 - , writerLiterateHaskell = False - , writerEmailObfuscation = JavascriptObfuscation - , writerIdentifierPrefix = "" - , writerSourceDirectory = "." - , writerUserDataDir = Nothing - , writerCiteMethod = Citeproc - , writerBiblioFiles = [] - , writerHtml5 = False - , writerBeamer = False - , writerSlideLevel = Nothing - , writerChapters = False - , writerListings = False - , writerHighlight = False - , writerHighlightStyle = pygments - , writerSetextHeaders = True - , writerTeXLigatures = True - } +-- | Render HTML tags. +renderTags' :: [Tag String] -> String +renderTags' = renderTagsOptions + renderOptions{ optMinimize = \x -> + let y = map toLower x + in y == "hr" || y == "br" || + y == "img" || y == "meta" || + y == "link" + , optRawTag = \x -> + let y = map toLower x + in y == "script" || y == "style" } -- -- File handling @@ -606,3 +511,15 @@ warn :: String -> IO () warn msg = do name <- getProgName UTF8.hPutStrLn stderr $ name ++ ": " ++ msg + +-- +-- Safe read +-- + +safeRead :: (Monad m, Read a) => String -> m a +safeRead s = case reads s of + (d,x):_ + | all isSpace x -> return d + _ -> fail $ "Could not read `" ++ s ++ "'" + + diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index dfdcd8e63..4f5ad54bd 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -30,7 +30,7 @@ A simple templating system with variable substitution and conditionals. Example: > renderTemplate [("name","Sam"),("salary","50,000")] $ -> "Hi, $name$. $if(salary)$You make $$$salary$.$else$No salary data.$endif$" +> "Hi, $name$. $if(salary)$You make $$$salary$.$else$No salary data.$endif$" > "Hi, John. You make $50,000." A slot for an interpolated variable is a variable name surrounded @@ -68,8 +68,8 @@ module Text.Pandoc.Templates ( renderTemplate , TemplateTarget , getDefaultTemplate ) where -import Text.ParserCombinators.Parsec -import Control.Monad (liftM, when, forM) +import Text.Parsec +import Control.Monad (liftM, when, forM, mzero) import System.FilePath import Data.List (intercalate, intersperse) #if MIN_VERSION_blaze_html(0,5,0) @@ -83,22 +83,26 @@ import Text.Pandoc.Shared (readDataFile) import qualified Control.Exception.Extensible as E (try, IOException) -- | Get default template for the specified writer. -getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first - -> String -- ^ Name of writer +getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first + -> String -- ^ Name of writer -> IO (Either E.IOException String) -getDefaultTemplate _ "native" = return $ Right "" -getDefaultTemplate _ "json" = return $ Right "" -getDefaultTemplate _ "docx" = return $ Right "" -getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument" -getDefaultTemplate user "epub" = getDefaultTemplate user "html" getDefaultTemplate user writer = do - let format = takeWhile (/='+') writer -- strip off "+lhs" if present - let fname = "templates" </> "default" <.> format - E.try $ readDataFile user fname + let format = takeWhile (`notElem` "+-") writer -- strip off extensions + case format of + "native" -> return $ Right "" + "json" -> return $ Right "" + "docx" -> return $ Right "" + "epub" -> return $ Right "" + "odt" -> getDefaultTemplate user "opendocument" + "markdown_strict" -> getDefaultTemplate user "markdown" + "multimarkdown" -> getDefaultTemplate user "markdown" + "markdown_github" -> getDefaultTemplate user "markdown" + _ -> let fname = "templates" </> "default" <.> format + in E.try $ readDataFile user fname data TemplateState = TemplateState Int [(String,String)] -adjustPosition :: String -> GenParser Char TemplateState String +adjustPosition :: String -> Parsec [Char] TemplateState String adjustPosition str = do let lastline = takeWhile (/= '\n') $ reverse str updateState $ \(TemplateState pos x) -> @@ -108,18 +112,18 @@ adjustPosition str = do return str class TemplateTarget a where - toTarget :: String -> a + toTarget :: String -> a instance TemplateTarget String where toTarget = id -instance TemplateTarget ByteString where +instance TemplateTarget ByteString where toTarget = fromString instance TemplateTarget Html where toTarget = preEscapedString --- | Renders a template +-- | Renders a template renderTemplate :: TemplateTarget a => [(String,String)] -- ^ Assoc. list of values for variables -> String -- ^ Template @@ -132,21 +136,21 @@ renderTemplate vals templ = reservedWords :: [String] reservedWords = ["else","endif","for","endfor","sep"] -parseTemplate :: GenParser Char TemplateState [String] +parseTemplate :: Parsec [Char] TemplateState [String] parseTemplate = many $ (plaintext <|> escapedDollar <|> conditional <|> for <|> variable) >>= adjustPosition -plaintext :: GenParser Char TemplateState String +plaintext :: Parsec [Char] TemplateState String plaintext = many1 $ noneOf "$" -escapedDollar :: GenParser Char TemplateState String +escapedDollar :: Parsec [Char] TemplateState String escapedDollar = try $ string "$$" >> return "$" -skipEndline :: GenParser Char st () +skipEndline :: Parsec [Char] st () skipEndline = try $ skipMany (oneOf " \t") >> newline >> return () -conditional :: GenParser Char TemplateState String +conditional :: Parsec [Char] TemplateState String conditional = try $ do TemplateState pos vars <- getState string "$if(" @@ -170,7 +174,7 @@ conditional = try $ do then ifContents else elseContents -for :: GenParser Char TemplateState String +for :: Parsec [Char] TemplateState String for = try $ do TemplateState pos vars <- getState string "$for(" @@ -178,14 +182,14 @@ for = try $ do string ")$" -- if newline after the "for", then a newline after "endfor" will be swallowed multiline <- option False $ try $ skipEndline >> return True - let matches = filter (\(k,_) -> k == id') vars + let matches = filter (\(k,_) -> k == id') vars let indent = replicate pos ' ' contents <- forM matches $ \m -> do updateState $ \(TemplateState p v) -> TemplateState p (m:v) raw <- liftM concat $ lookAhead parseTemplate return $ intercalate ('\n':indent) $ lines $ raw ++ "\n" parseTemplate - sep <- option "" $ do try (string "$sep$") + sep <- option "" $ do try (string "$sep$") when multiline $ optional skipEndline liftM concat parseTemplate string "$endfor$" @@ -193,16 +197,16 @@ for = try $ do setState $ TemplateState pos vars return $ concat $ intersperse sep contents -ident :: GenParser Char TemplateState String +ident :: Parsec [Char] TemplateState String ident = do first <- letter rest <- many (alphaNum <|> oneOf "_-") let id' = first : rest if id' `elem` reservedWords - then pzero + then mzero else return id' -variable :: GenParser Char TemplateState String +variable :: Parsec [Char] TemplateState String variable = try $ do char '$' id' <- ident diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index e2959eae7..508ad30a9 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.UTF8 Copyright : Copyright (C) 2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 1913eb92b..e314cf70e 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -40,8 +40,8 @@ module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where import Text.Pandoc.Definition import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared -import Text.Pandoc.Parsing hiding (blankline) -import Text.ParserCombinators.Parsec ( runParser, GenParser ) +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, space) import Data.List ( isPrefixOf, intersperse, intercalate ) import Text.Pandoc.Pretty import Control.Monad.State @@ -93,7 +93,7 @@ escapeString = escapeStringUsing escs where escs = backslashEscapes "{" -- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char ParserState Char +olMarker :: Parser [Char] ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 964320eb2..df11d79cc 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.ConTeXt Copyright : Copyright (C) 2007-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of 'Pandoc' format into ConTeXt. @@ -31,6 +31,7 @@ Conversion of 'Pandoc' format into ConTeXt. module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Generic (queryWith) import Text.Printf ( printf ) import Data.List ( intercalate ) @@ -39,23 +40,23 @@ import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate ) import Network.URI ( isURI, unEscapeString ) -data WriterState = +data WriterState = WriterState { stNextRef :: Int -- number of next URL reference , stOrderedListLevel :: Int -- level of ordered list , stOptions :: WriterOptions -- writer options } orderedListStyles :: [[Char]] -orderedListStyles = cycle ["[n]","[a]", "[r]", "[g]"] +orderedListStyles = cycle ["[n]","[a]", "[r]", "[g]"] -- | Convert Pandoc to ConTeXt. writeConTeXt :: WriterOptions -> Pandoc -> String -writeConTeXt options document = +writeConTeXt options document = let defaultWriterState = WriterState { stNextRef = 1 , stOrderedListLevel = 0 , stOptions = options - } - in evalState (pandocToConTeXt options document) defaultWriterState + } + in evalState (pandocToConTeXt options document) defaultWriterState pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do @@ -120,7 +121,7 @@ elementToConTeXt opts (Sec level _ id' title' elements) = do return $ vcat (header' : innerContents) -- | Convert Pandoc block element to ConTeXt. -blockToConTeXt :: Block +blockToConTeXt :: Block -> State WriterState Doc blockToConTeXt Null = return empty blockToConTeXt (Plain lst) = inlineListToConTeXt lst @@ -128,7 +129,7 @@ blockToConTeXt (Para [Image txt (src,_)]) = do capt <- inlineListToConTeXt txt return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <> braces ("\\externalfigure" <> brackets (text src)) <> blankline -blockToConTeXt (Para lst) = do +blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst return $ contents <> blankline blockToConTeXt (BlockQuote lst) = do @@ -147,18 +148,18 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do let level = stOrderedListLevel st put $ st {stOrderedListLevel = level + 1} contents <- mapM listItemToConTeXt lst - put $ st {stOrderedListLevel = level} + put $ st {stOrderedListLevel = level} let start' = if start == 1 then "" else "start=" ++ show start let delim' = case delim of DefaultDelim -> "" - Period -> "stopper=." - OneParen -> "stopper=)" + Period -> "stopper=." + OneParen -> "stopper=)" TwoParens -> "left=(,stopper=)" - let width = maximum $ map length $ take (length contents) + let width = maximum $ map length $ take (length contents) (orderedListMarkers (start, style', delim)) let width' = (toEnum width + 1) / 2 - let width'' = if width' > (1.5 :: Double) - then "width=" ++ show width' ++ "em" + let width'' = if width' > (1.5 :: Double) + then "width=" ++ show width' ++ "em" else "" let specs2Items = filter (not . null) [start', delim', width''] let specs2 = if null specs2Items @@ -166,8 +167,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do else "[" ++ intercalate "," specs2Items ++ "]" let style'' = case style' of DefaultStyle -> orderedListStyles !! level - Decimal -> "[n]" - Example -> "[n]" + Decimal -> "[n]" + Example -> "[n]" LowerRoman -> "[r]" UpperRoman -> "[R]" LowerAlpha -> "[a]" @@ -182,21 +183,21 @@ blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline blockToConTeXt (Header level lst) = sectionHeader "" level lst blockToConTeXt (Table caption aligns widths heads rows) = do let colDescriptor colWidth alignment = (case alignment of - AlignLeft -> 'l' + AlignLeft -> 'l' AlignRight -> 'r' AlignCenter -> 'c' AlignDefault -> 'l'): if colWidth == 0 then "|" else ("p(" ++ printf "%.2f" colWidth ++ "\\textwidth)|") - let colDescriptors = "|" ++ (concat $ + let colDescriptors = "|" ++ (concat $ zipWith colDescriptor widths aligns) headers <- if all null heads then return empty - else liftM ($$ "\\HL") $ tableRowToConTeXt heads - captionText <- inlineListToConTeXt caption + else liftM ($$ "\\HL") $ tableRowToConTeXt heads + captionText <- inlineListToConTeXt caption let captionText' = if null caption then text "none" else captionText - rows' <- mapM tableRowToConTeXt rows + rows' <- mapM tableRowToConTeXt rows return $ "\\placetable[here]" <> braces captionText' $$ "\\starttable" <> brackets (text colDescriptors) $$ "\\HL" $$ headers $$ @@ -230,7 +231,7 @@ inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt lst -- | Convert inline element to ConTeXt inlineToConTeXt :: Inline -- ^ Inline to convert -> State WriterState Doc -inlineToConTeXt (Emph lst) = do +inlineToConTeXt (Emph lst) = do contents <- inlineListToConTeXt lst return $ braces $ "\\em " <> contents inlineToConTeXt (Strong lst) = do diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 1bcf99dcf..e696fc63e 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Docbook Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -31,6 +31,7 @@ module Text.Pandoc.Writers.Docbook ( writeDocbook) where import Text.Pandoc.Definition import Text.Pandoc.XML import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath import Data.List ( isPrefixOf, intercalate, isSuffixOf ) @@ -47,23 +48,23 @@ authorToDocbook opts name' = let name = render Nothing $ inlinesToDocbook opts name' in if ',' `elem` name then -- last name first - let (lastname, rest) = break (==',') name + let (lastname, rest) = break (==',') name firstname = removeLeadingSpace rest in - inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> - inTagsSimple "surname" (text $ escapeStringForXML lastname) + inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> + inTagsSimple "surname" (text $ escapeStringForXML lastname) else -- last name last let namewords = words name - lengthname = length namewords + lengthname = length namewords (firstname, lastname) = case lengthname of - 0 -> ("","") + 0 -> ("","") 1 -> ("", name) n -> (intercalate " " (take (n-1) namewords), last namewords) - in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ - inTagsSimple "surname" (text $ escapeStringForXML lastname) + in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (text $ escapeStringForXML lastname) -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String -writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = +writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = let title = inlinesToDocbook opts tit authors = map (authorToDocbook opts) auths date = inlinesToDocbook opts dat @@ -92,7 +93,7 @@ writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = -- | Convert an Element to Docbook. elementToDocbook :: WriterOptions -> Int -> Element -> Doc -elementToDocbook opts _ (Blk block) = blockToDocbook opts block +elementToDocbook opts _ (Blk block) = blockToDocbook opts block elementToDocbook opts lvl (Sec _ _num id' title elements) = -- Docbook doesn't allow sections with no content, so insert some if needed let elements' = if null elements @@ -115,10 +116,10 @@ plainToPara :: Block -> Block plainToPara (Plain x) = Para x plainToPara x = x --- | Convert a list of pairs of terms and definitions into a list of +-- | Convert a list of pairs of terms and definitions into a list of -- Docbook varlistentrys. deflistItemsToDocbook :: WriterOptions -> [([Inline],[[Block]])] -> Doc -deflistItemsToDocbook opts items = +deflistItemsToDocbook opts items = vcat $ map (\(term, defs) -> deflistItemToDocbook opts term defs) items -- | Convert a term and a list of blocks into a Docbook varlistentry. @@ -144,13 +145,16 @@ blockToDocbook _ Null = empty blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst blockToDocbook opts (Para [Image txt (src,_)]) = - let capt = inlinesToDocbook opts txt + let alt = inlinesToDocbook opts txt + capt = if null txt + then empty + else inTagsSimple "title" alt in inTagsIndented "figure" $ - inTagsSimple "title" capt $$ + capt $$ (inTagsIndented "mediaobject" $ (inTagsIndented "imageobject" (selfClosingTag "imagedata" [("fileref",src)])) $$ - inTagsSimple "textobject" (inTagsSimple "phrase" capt)) + inTagsSimple "textobject" (inTagsSimple "phrase" alt)) blockToDocbook opts (Para lst) = inTagsIndented "para" $ inlinesToDocbook opts lst blockToDocbook opts (BlockQuote blocks) = @@ -167,9 +171,9 @@ blockToDocbook _ (CodeBlock (_,classes,_) str) = then [s] else languagesByExtension . map toLower $ s langs = concatMap langsFrom classes -blockToDocbook opts (BulletList lst) = - inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst -blockToDocbook _ (OrderedList _ []) = empty +blockToDocbook opts (BulletList lst) = + inTagsIndented "itemizedlist" $ listItemsToDocbook opts lst +blockToDocbook _ (OrderedList _ []) = empty blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = let attribs = case numstyle of DefaultStyle -> [] @@ -182,12 +186,12 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = items = if start == 1 then listItemsToDocbook opts (first:rest) else (inTags True "listitem" [("override",show start)] - (blocksToDocbook opts $ map plainToPara first)) $$ - listItemsToDocbook opts rest + (blocksToDocbook opts $ map plainToPara first)) $$ + listItemsToDocbook opts rest in inTags True "orderedlist" attribs items -blockToDocbook opts (DefinitionList lst) = - inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block +blockToDocbook opts (DefinitionList lst) = + inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst +blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block -- we allow html for compatibility with earlier versions of pandoc blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block blockToDocbook _ (RawBlock _ _) = empty @@ -237,26 +241,26 @@ inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst -- | Convert an inline element to Docbook. inlineToDocbook :: WriterOptions -> Inline -> Doc -inlineToDocbook _ (Str str) = text $ escapeStringForXML str -inlineToDocbook opts (Emph lst) = +inlineToDocbook _ (Str str) = text $ escapeStringForXML str +inlineToDocbook opts (Emph lst) = inTagsSimple "emphasis" $ inlinesToDocbook opts lst -inlineToDocbook opts (Strong lst) = +inlineToDocbook opts (Strong lst) = inTags False "emphasis" [("role", "strong")] $ inlinesToDocbook opts lst -inlineToDocbook opts (Strikeout lst) = +inlineToDocbook opts (Strikeout lst) = inTags False "emphasis" [("role", "strikethrough")] $ inlinesToDocbook opts lst -inlineToDocbook opts (Superscript lst) = +inlineToDocbook opts (Superscript lst) = inTagsSimple "superscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (Subscript lst) = +inlineToDocbook opts (Subscript lst) = inTagsSimple "subscript" $ inlinesToDocbook opts lst -inlineToDocbook opts (SmallCaps lst) = +inlineToDocbook opts (SmallCaps lst) = inTags False "emphasis" [("role", "smallcaps")] $ inlinesToDocbook opts lst -inlineToDocbook opts (Quoted _ lst) = +inlineToDocbook opts (Quoted _ lst) = inTagsSimple "quote" $ inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = - inlinesToDocbook opts lst -inlineToDocbook _ (Code _ str) = + inlinesToDocbook opts lst +inlineToDocbook _ (Code _ str) = inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math t str) | isMathML (writerHTMLMathMethod opts) = @@ -282,7 +286,7 @@ inlineToDocbook _ Space = space inlineToDocbook opts (Link txt (src, _)) = if isPrefixOf "mailto:" src then let src' = drop 7 src - emailLink = inTagsSimple "email" $ text $ + emailLink = inTagsSimple "email" $ text $ escapeStringForXML $ src' in case txt of [Code _ s] | s == src' -> emailLink @@ -292,14 +296,14 @@ inlineToDocbook opts (Link txt (src, _)) = then inTags False "link" [("linkend", drop 1 src)] else inTags False "ulink" [("url", src)]) $ inlinesToDocbook opts txt -inlineToDocbook _ (Image _ (src, tit)) = +inlineToDocbook _ (Image _ (src, tit)) = let titleDoc = if null tit then empty else inTagsIndented "objectinfo" $ inTagsIndented "title" (text $ escapeStringForXML tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] -inlineToDocbook opts (Note contents) = +inlineToDocbook opts (Note contents) = inTagsIndented "footnote" $ blocksToDocbook opts contents isMathML :: HTMLMathMethod -> Bool diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 396e7a482..05c9555c6 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Generic import System.Directory import Text.Pandoc.ImageSize import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlight ) import Text.Highlighting.Kate.Types () @@ -93,14 +94,13 @@ mknode s attrs = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s) -- | Produce an Docx file from a Pandoc document. -writeDocx :: Maybe FilePath -- ^ Path specified by --reference-docx - -> WriterOptions -- ^ Writer options +writeDocx :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString -writeDocx mbRefDocx opts doc@(Pandoc (Meta tit auths date) _) = do +writeDocx opts doc@(Pandoc (Meta tit auths date) _) = do let datadir = writerUserDataDir opts refArchive <- liftM toArchive $ - case mbRefDocx of + case writerReferenceDocx opts of Just f -> B.readFile f Nothing -> do let defaultDocx = getDataFileName "reference.docx" >>= B.readFile @@ -543,7 +543,7 @@ inlineToOpenXML opts (SmallCaps lst) = inlineToOpenXML opts (Strikeout lst) = withTextProp (mknode "w:strike" [] ()) $ inlinesToOpenXML opts lst -inlineToOpenXML _ LineBreak = return [ mknode "w:br" [] () ] +inlineToOpenXML _ LineBreak = return [br] inlineToOpenXML _ (RawInline f str) | f == "openxml" = return [ x | Elem x <- parseXML str ] | otherwise = return [] @@ -562,16 +562,14 @@ inlineToOpenXML opts (Math DisplayMath str) = Left _ -> do fallback <- inlinesToOpenXML opts (readTeXMath str) return $ [br] ++ fallback ++ [br] - where br = mknode "w:br" [] () inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML _ (Code attrs str) = withTextProp (rStyle "VerbatimChar") $ case highlight formatOpenXML attrs str of - Nothing -> intercalate [mknode "w:br" [] ()] + Nothing -> intercalate [br] `fmap` (mapM formattedString $ lines str) Just h -> return h - where formatOpenXML _fmtOpts = intercalate [mknode "w:br" [] ()] . - map (map toHlTok) + where formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = mknode "w:r" [] [ mknode "w:rPr" [] [ rStyle $ show toktype ] @@ -669,3 +667,6 @@ inlineToOpenXML opts (Image alt (src, tit)) = do liftIO $ UTF8.hPutStrLn stderr $ "Could not find image `" ++ src ++ "', skipping..." inlinesToOpenXML opts alt + +br :: Element +br = mknode "w:r" [] [mknode "w:cr" [] () ] diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index b423f136f..46310e398 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -38,6 +38,7 @@ import Data.ByteString.Lazy.UTF8 ( fromString ) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Text.Pandoc.Shared hiding ( Element ) +import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Generic import Control.Monad.State @@ -52,12 +53,10 @@ import Prelude hiding (catch) import Control.Exception (catch, SomeException) -- | Produce an EPUB file from a Pandoc document. -writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line - -> [FilePath] -- ^ Paths to fonts to embed - -> WriterOptions -- ^ Writer options +writeEPUB :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString -writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do +writeEPUB opts doc@(Pandoc meta _) = do epochtime <- floor `fmap` getPOSIXTime let mkEntry path content = toEntry path epochtime content let opts' = opts{ writerEmailObfuscation = NoObfuscation @@ -107,7 +106,7 @@ writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do -- handle fonts let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f - fontEntries <- mapM mkFontEntry fonts + fontEntries <- mapM mkFontEntry $ writerEpubFonts opts -- body pages let isH1 (Header 1 _) = True @@ -232,7 +231,7 @@ writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- stylesheet - stylesheet <- case mbStylesheet of + stylesheet <- case writerEpubStylesheet opts of Just s -> return s Nothing -> readDataFile (writerUserDataDir opts) "epub.css" let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet @@ -249,12 +248,14 @@ metadataElement metadataXML uuid lang title authors date mbCoverImage = let userNodes = parseXML metadataXML elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ - filter isDublinCoreElement $ onlyElems userNodes + filter isMetadataElement $ onlyElems userNodes dublinElements = ["contributor","coverage","creator","date", "description","format","identifier","language","publisher", "relation","rights","source","subject","title","type"] - isDublinCoreElement e = qPrefix (elName e) == Just "dc" && - qName (elName e) `elem` dublinElements + isMetadataElement e = (qPrefix (elName e) == Just "dc" && + qName (elName e) `elem` dublinElements) || + (qPrefix (elName e) == Nothing && + qName (elName e) `elem` ["link","meta"]) contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ [ unode "dc:language" lang | not (elt `contains` "language") ] ++ @@ -288,10 +289,8 @@ transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do let writeHtmlInline opts z = removeTrailingSpace $ writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]] - mathml = writeHtmlInline defaultWriterOptions{ - writerHTMLMathMethod = MathML Nothing } x - fallback = writeHtmlInline defaultWriterOptions{ - writerHTMLMathMethod = PlainMath } x + mathml = writeHtmlInline def{writerHTMLMathMethod = MathML Nothing } x + fallback = writeHtmlInline def{writerHTMLMathMethod = PlainMath } x inOps = "<ops:switch xmlns:ops=\"http://www.idpf.org/2007/ops\">" ++ "<ops:case required-namespace=\"http://www.w3.org/1998/Math/MathML\">" ++ mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++ @@ -312,9 +311,9 @@ ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . unEntity ('&':'#':xs) = let (ds,ys) = break (==';') xs rest = drop 1 ys - in case reads ('\'':'\\':ds ++ "'") of - ((x,_):_) -> x : unEntity rest - _ -> '&':'#':unEntity xs + in case safeRead ('\'':'\\':ds ++ "'") of + Just x -> x : unEntity rest + Nothing -> '&':'#':unEntity xs unEntity (x:xs) = x : unEntity xs imageTypeOf :: FilePath -> Maybe String diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs new file mode 100644 index 000000000..301d80c54 --- /dev/null +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -0,0 +1,616 @@ +{- +Copyright (c) 2011-2012, Sergey Astanin +All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | Conversion of 'Pandoc' documents to FB2 (FictionBook2) format. + +FictionBook is an XML-based e-book format. For more information see: +<http://www.fictionbook.org/index.php/Eng:XML_Schema_Fictionbook_2.1> + +-} +module Text.Pandoc.Writers.FB2 (writeFB2) where + +import Control.Monad.State (StateT, evalStateT, get, modify) +import Control.Monad.State (liftM, liftM2, liftIO) +import Data.ByteString.Base64 (encode) +import Data.Char (toUpper, toLower, isSpace, isAscii, isControl) +import Data.List (intersperse, intercalate, isPrefixOf) +import Data.Either (lefts, rights) +import Network.Browser (browse, request, setAllowRedirects, setOutHandler) +import Network.HTTP (catchIO_, getRequest, getHeaders, getResponseBody) +import Network.HTTP (lookupHeader, HeaderName(..), urlEncode) +import Network.URI (isURI, unEscapeString) +import System.FilePath (takeExtension) +import Text.XML.Light +import qualified Control.Exception as E +import qualified Data.ByteString as B +import qualified Text.XML.Light as X +import qualified Text.XML.Light.Cursor as XC + +import Text.Pandoc.Definition +import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) +import Text.Pandoc.Shared (orderedListMarkers) +import Text.Pandoc.Generic (bottomUp) + +-- | Data to be written at the end of the document: +-- (foot)notes, URLs, references, images. +data FbRenderState = FbRenderState + { footnotes :: [ (Int, String, [Content]) ] -- ^ #, ID, text + , imagesToFetch :: [ (String, String) ] -- ^ filename, URL or path + , parentListMarker :: String -- ^ list marker of the parent ordered list + , parentBulletLevel :: Int -- ^ nesting level of the unordered list + , writerOptions :: WriterOptions + } deriving (Show) + +-- | FictionBook building monad. +type FBM = StateT FbRenderState IO + +newFB :: FbRenderState +newFB = FbRenderState { footnotes = [], imagesToFetch = [] + , parentListMarker = "", parentBulletLevel = 0 + , writerOptions = def } + +data ImageMode = NormalImage | InlineImage deriving (Eq) +instance Show ImageMode where + show NormalImage = "imageType" + show InlineImage = "inlineImageType" + +-- | Produce an FB2 document from a 'Pandoc' document. +writeFB2 :: WriterOptions -- ^ conversion options + -> Pandoc -- ^ document to convert + -> IO String -- ^ FictionBook2 document (not encoded yet) +writeFB2 opts (Pandoc meta blocks) = flip evalStateT newFB $ do + modify (\s -> s { writerOptions = opts { writerStandalone = True } }) + desc <- description meta + fp <- frontpage meta + secs <- renderSections 1 blocks + let body = el "body" $ fp ++ secs + notes <- renderFootnotes + (imgs,missing) <- liftM imagesToFetch get >>= \s -> liftIO (fetchImages s) + let body' = replaceImagesWithAlt missing body + let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) + return $ xml_head ++ (showContent fb2_xml) + where + xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + fb2_attrs = + let xmlns = "http://www.gribuser.ru/xml/fictionbook/2.0" + xlink = "http://www.w3.org/1999/xlink" + in [ uattr "xmlns" xmlns + , attr ("xmlns", "l") xlink ] + -- + frontpage :: Meta -> FBM [Content] + frontpage meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ + [ el "title" (el "p" t) + , el "annotation" (map (el "p" . cMap plain) + (docAuthors meta' ++ [docDate meta'])) + ] + description :: Meta -> FBM Content + description meta' = do + bt <- booktitle meta' + let as = authors meta' + dd <- docdate meta' + return $ el "description" + [ el "title-info" (bt ++ as ++ dd) + , el "document-info" [ el "program-used" "pandoc" ] -- FIXME: +version + ] + booktitle :: Meta -> FBM [Content] + booktitle meta' = do + t <- cMapM toXml . docTitle $ meta' + return $ if null t + then [] + else [ el "book-title" t ] + authors :: Meta -> [Content] + authors meta' = cMap author (docAuthors meta') + author :: [Inline] -> [Content] + author ss = + let ws = words . cMap plain $ ss + email = (el "email") `fmap` (take 1 $ filter ('@' `elem`) ws) + ws' = filter ('@' `notElem`) ws + names = case ws' of + (nickname:[]) -> [ el "nickname" nickname ] + (fname:lname:[]) -> [ el "first-name" fname + , el "last-name" lname ] + (fname:rest) -> [ el "first-name" fname + , el "middle-name" (concat . init $ rest) + , el "last-name" (last rest) ] + ([]) -> [] + in list $ el "author" (names ++ email) + docdate :: Meta -> FBM [Content] + docdate meta' = do + let ss = docDate meta' + d <- cMapM toXml ss + return $ if null d + then [] + else [el "date" d] + +-- | Divide the stream of blocks into sections and convert to XML +-- representation. +renderSections :: Int -> [Block] -> FBM [Content] +renderSections level blocks = do + let secs = splitSections level blocks + mapM (renderSection level) secs + +renderSection :: Int -> ([Inline], [Block]) -> FBM Content +renderSection level (ttl, body) = do + title <- if null ttl + then return [] + else return . list . el "title" . formatTitle $ ttl + content <- if (hasSubsections body) + then renderSections (level + 1) body + else cMapM blockToXml body + return $ el "section" (title ++ content) + where + hasSubsections = any isHeader + isHeader (Header _ _) = True + isHeader _ = False + +-- | Only <p> and <empty-line> are allowed within <title> in FB2. +formatTitle :: [Inline] -> [Content] +formatTitle inlines = + let lns = split isLineBreak inlines + lns' = map (el "p" . cMap plain) lns + in intersperse (el "empty-line" ()) lns' + +split :: (a -> Bool) -> [a] -> [[a]] +split _ [] = [] +split cond xs = let (b,a) = break cond xs + in (b:split cond (drop 1 a)) + +isLineBreak :: Inline -> Bool +isLineBreak LineBreak = True +isLineBreak _ = False + +-- | Divide the stream of block elements into sections: [(title, blocks)]. +splitSections :: Int -> [Block] -> [([Inline], [Block])] +splitSections level blocks = reverse $ revSplit (reverse blocks) + where + revSplit [] = [] + revSplit rblocks = + let (lastsec, before) = break sameLevel rblocks + (header, prevblocks) = + case before of + ((Header n title):prevblocks') -> + if n == level + then (title, prevblocks') + else ([], before) + _ -> ([], before) + in (header, reverse lastsec) : revSplit prevblocks + sameLevel (Header n _) = n == level + sameLevel _ = False + +-- | Make another FictionBook body with footnotes. +renderFootnotes :: FBM [Content] +renderFootnotes = do + fns <- footnotes `liftM` get + if null fns + then return [] -- no footnotes + else return . list $ + el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) + where + renderFN (n, idstr, cs) = + let fn_texts = (el "title" (el "p" (show n))) : cs + in el "section" ([uattr "id" idstr], fn_texts) + +-- | Fetch images and encode them for the FictionBook XML. +-- Return image data and a list of hrefs of the missing images. +fetchImages :: [(String,String)] -> IO ([Content],[String]) +fetchImages links = do + imgs <- mapM (uncurry fetchImage) links + return $ (rights imgs, lefts imgs) + +-- | Fetch image data from disk or from network and make a <binary> XML section. +-- Return either (Left hrefOfMissingImage) or (Right xmlContent). +fetchImage :: String -> String -> IO (Either String Content) +fetchImage href link = do + mbimg <- + case (isURI link, readDataURI link) of + (True, Just (mime,_,True,base64)) -> + let mime' = map toLower mime + in if mime' == "image/png" || mime' == "image/jpeg" + then return (Just (mime',base64)) + else return Nothing + (True, Just _) -> return Nothing -- not base64-encoded + (True, Nothing) -> fetchURL link + (False, _) -> do + d <- nothingOnError $ B.readFile (unEscapeString link) + let t = case map toLower (takeExtension link) of + ".png" -> Just "image/png" + ".jpg" -> Just "image/jpeg" + ".jpeg" -> Just "image/jpeg" + ".jpe" -> Just "image/jpeg" + _ -> Nothing -- only PNG and JPEG are supported in FB2 + return $ liftM2 (,) t (liftM (toStr . encode) d) + case mbimg of + Just (imgtype, imgdata) -> do + return . Right $ el "binary" + ( [uattr "id" href + , uattr "content-type" imgtype] + , txt imgdata ) + _ -> return (Left ('#':href)) + where + nothingOnError :: (IO B.ByteString) -> (IO (Maybe B.ByteString)) + nothingOnError action = liftM Just action `E.catch` omnihandler + omnihandler :: E.SomeException -> IO (Maybe B.ByteString) + omnihandler _ = return Nothing + +-- | Extract mime type and encoded data from the Data URI. +readDataURI :: String -- ^ URI + -> Maybe (String,String,Bool,String) + -- ^ Maybe (mime,charset,isBase64,data) +readDataURI uri = + let prefix = "data:" + in if not (prefix `isPrefixOf` uri) + then Nothing + else + let rest = drop (length prefix) uri + meta = takeWhile (/= ',') rest -- without trailing ',' + uridata = drop (length meta + 1) rest + parts = split (== ';') meta + (mime,cs,enc)=foldr upd ("text/plain","US-ASCII",False) parts + in Just (mime,cs,enc,uridata) + where + upd str m@(mime,cs,enc) + | isMimeType str = (str,cs,enc) + | "charset=" `isPrefixOf` str = (mime,drop (length "charset=") str,enc) + | str == "base64" = (mime,cs,True) + | otherwise = m + +-- Without parameters like ;charset=...; see RFC 2045, 5.1 +isMimeType :: String -> Bool +isMimeType s = + case split (=='/') s of + [mtype,msubtype] -> + ((map toLower mtype) `elem` types + || "x-" `isPrefixOf` (map toLower mtype)) + && all valid mtype + && all valid msubtype + _ -> False + where + types = ["text","image","audio","video","application","message","multipart"] + valid c = isAscii c && not (isControl c) && not (isSpace c) && + c `notElem` "()<>@,;:\\\"/[]?=" + +-- | Fetch URL, return its Content-Type and binary data on success. +fetchURL :: String -> IO (Maybe (String, String)) +fetchURL url = do + flip catchIO_ (return Nothing) $ do + r <- browse $ do + setOutHandler (const (return ())) + setAllowRedirects True + liftM snd . request . getRequest $ url + let content_type = lookupHeader HdrContentType (getHeaders r) + content <- liftM (Just . toStr . encode . toBS) . getResponseBody $ Right r + return $ liftM2 (,) content_type content + where + +toBS :: String -> B.ByteString +toBS = B.pack . map (toEnum . fromEnum) + +toStr :: B.ByteString -> String +toStr = map (toEnum . fromEnum) . B.unpack + +footnoteID :: Int -> String +footnoteID i = "n" ++ (show i) + +linkID :: Int -> String +linkID i = "l" ++ (show i) + +-- | Convert a block-level Pandoc's element to FictionBook XML representation. +blockToXml :: Block -> FBM [Content] +blockToXml (Plain ss) = cMapM toXml ss -- FIXME: can lead to malformed FB2 +blockToXml (Para [Math DisplayMath formula]) = insertMath NormalImage formula +blockToXml (Para [img@(Image _ _)]) = insertImage NormalImage img +blockToXml (Para ss) = liftM (list . el "p") $ cMapM toXml ss +blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . + map (el "p" . el "code") . lines $ s +blockToXml (RawBlock _ s) = return . spaceBeforeAfter . + map (el "p" . el "code") . lines $ s +blockToXml (BlockQuote bs) = liftM (list . el "cite") $ cMapM blockToXml bs +blockToXml (OrderedList a bss) = do + state <- get + let pmrk = parentListMarker state + let markers = map ((pmrk ++ " ") ++) $ orderedListMarkers a + let mkitem mrk bs = do + modify (\s -> s { parentListMarker = mrk }) + itemtext <- cMapM blockToXml . paraToPlain $ bs + modify (\s -> s { parentListMarker = pmrk }) -- old parent marker + return . el "p" $ [ txt mrk, txt " " ] ++ itemtext + mapM (uncurry mkitem) (zip markers bss) +blockToXml (BulletList bss) = do + state <- get + let level = parentBulletLevel state + let pmrk = parentListMarker state + let prefix = replicate (length pmrk) ' ' + let bullets = ["\x2022", "\x25e6", "*", "\x2043", "\x2023"] + let mrk = prefix ++ bullets !! (level `mod` (length bullets)) + let mkitem bs = do + modify (\s -> s { parentBulletLevel = (level+1) }) + itemtext <- cMapM blockToXml . paraToPlain $ bs + modify (\s -> s { parentBulletLevel = level }) -- restore bullet level + return $ el "p" $ [ txt (mrk ++ " ") ] ++ itemtext + mapM mkitem bss +blockToXml (DefinitionList defs) = + cMapM mkdef defs + where + mkdef (term, bss) = do + def' <- cMapM (cMapM blockToXml . sep . paraToPlain . map indent) bss + t <- wrap "strong" term + return [ el "p" t, el "p" def' ] + sep blocks = + if all needsBreak blocks then + blocks ++ [Plain [LineBreak]] + else + blocks + needsBreak (Para _) = False + needsBreak (Plain ins) = LineBreak `notElem` ins + needsBreak _ = True +blockToXml (Header _ _) = -- should never happen, see renderSections + error "unexpected header in section text" +blockToXml HorizontalRule = return + [ el "empty-line" () + , el "p" (txt (replicate 10 '—')) + , el "empty-line" () ] +blockToXml (Table caption aligns _ headers rows) = do + hd <- mkrow "th" headers aligns + bd <- mapM (\r -> mkrow "td" r aligns) rows + c <- return . el "emphasis" =<< cMapM toXml caption + return [el "table" (hd : bd), el "p" c] + where + mkrow :: String -> [TableCell] -> [Alignment] -> FBM Content + mkrow tag cells aligns' = + (el "tr") `liftM` (mapM (mkcell tag) (zip cells aligns')) + -- + mkcell :: String -> (TableCell, Alignment) -> FBM Content + mkcell tag (cell, align) = do + cblocks <- cMapM blockToXml cell + return $ el tag ([align_attr align], cblocks) + -- + align_attr a = Attr (QName "align" Nothing Nothing) (align_str a) + align_str AlignLeft = "left" + align_str AlignCenter = "center" + align_str AlignRight = "right" + align_str AlignDefault = "left" +blockToXml Null = return [] + +-- Replace paragraphs with plain text and line break. +-- Necessary to simulate multi-paragraph lists in FB2. +paraToPlain :: [Block] -> [Block] +paraToPlain [] = [] +paraToPlain (Para inlines : rest) = + let p = (Plain (inlines ++ [LineBreak])) + in p : paraToPlain rest +paraToPlain (p:rest) = p : paraToPlain rest + +-- Simulate increased indentation level. Will not really work +-- for multi-line paragraphs. +indent :: Block -> Block +indent = indentBlock + where + -- indentation space + spacer :: String + spacer = replicate 4 ' ' + -- + indentBlock (Plain ins) = Plain ((Str spacer):ins) + indentBlock (Para ins) = Para ((Str spacer):ins) + indentBlock (CodeBlock a s) = + let s' = unlines . map (spacer++) . lines $ s + in CodeBlock a s' + indentBlock (BlockQuote bs) = BlockQuote (map indent bs) + indentBlock (Header l ins) = Header l (indentLines ins) + indentBlock everythingElse = everythingElse + -- indent every (explicit) line + indentLines :: [Inline] -> [Inline] + indentLines ins = let lns = split isLineBreak ins :: [[Inline]] + in intercalate [LineBreak] $ map ((Str spacer):) lns + +-- | Convert a Pandoc's Inline element to FictionBook XML representation. +toXml :: Inline -> FBM [Content] +toXml (Str s) = return [txt s] +toXml (Emph ss) = list `liftM` wrap "emphasis" ss +toXml (Strong ss) = list `liftM` wrap "strong" ss +toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss +toXml (Superscript ss) = list `liftM` wrap "sup" ss +toXml (Subscript ss) = list `liftM` wrap "sub" ss +toXml (SmallCaps ss) = cMapM toXml $ bottomUp (map toUpper) ss +toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific + inner <- cMapM toXml ss + return $ [txt "‘"] ++ inner ++ [txt "’"] +toXml (Quoted DoubleQuote ss) = do + inner <- cMapM toXml ss + return $ [txt "“"] ++ inner ++ [txt "”"] +toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles +toXml (Code _ s) = return [el "code" s] +toXml Space = return [txt " "] +toXml LineBreak = return [el "empty-line" ()] +toXml (Math _ formula) = insertMath InlineImage formula +toXml (RawInline _ _) = return [] -- raw TeX and raw HTML are suppressed +toXml (Link text (url,ttl)) = do + fns <- footnotes `liftM` get + let n = 1 + length fns + let ln_id = linkID n + let ln_ref = list . el "sup" . txt $ "[" ++ show n ++ "]" + ln_text <- cMapM toXml text + let ln_desc = + let ttl' = dropWhile isSpace ttl + in if null ttl' + then list . el "p" $ el "code" url + else list . el "p" $ [ txt (ttl' ++ ": "), el "code" url ] + modify (\s -> s { footnotes = (n, ln_id, ln_desc) : fns }) + return $ ln_text ++ + [ el "a" + ( [ attr ("l","href") ('#':ln_id) + , uattr "type" "note" ] + , ln_ref) ] +toXml img@(Image _ _) = insertImage InlineImage img +toXml (Note bs) = do + fns <- footnotes `liftM` get + let n = 1 + length fns + let fn_id = footnoteID n + fn_desc <- cMapM blockToXml bs + modify (\s -> s { footnotes = (n, fn_id, fn_desc) : fns }) + let fn_ref = el "sup" . txt $ "[" ++ show n ++ "]" + return . list $ el "a" ( [ attr ("l","href") ('#':fn_id) + , uattr "type" "note" ] + , fn_ref ) + +insertMath :: ImageMode -> String -> FBM [Content] +insertMath immode formula = do + htmlMath <- return . writerHTMLMathMethod . writerOptions =<< get + case htmlMath of + WebTeX url -> do + let alt = [Code nullAttr formula] + let imgurl = url ++ urlEncode formula + let img = Image alt (imgurl, "") + insertImage immode img + _ -> return [el "code" formula] + +insertImage :: ImageMode -> Inline -> FBM [Content] +insertImage immode (Image alt (url,ttl)) = do + images <- imagesToFetch `liftM` get + let n = 1 + length images + let fname = "image" ++ show n + modify (\s -> s { imagesToFetch = (fname, url) : images }) + let ttlattr = case (immode, null ttl) of + (NormalImage, False) -> [ uattr "title" ttl ] + _ -> [] + return . list $ + el "image" $ + [ attr ("l","href") ('#':fname) + , attr ("l","type") (show immode) + , uattr "alt" (cMap plain alt) ] + ++ ttlattr +insertImage _ _ = error "unexpected inline instead of image" + +replaceImagesWithAlt :: [String] -> Content -> Content +replaceImagesWithAlt missingHrefs body = + let cur = XC.fromContent body + cur' = replaceAll cur + in XC.toTree . XC.root $ cur' + where + -- + replaceAll :: XC.Cursor -> XC.Cursor + replaceAll c = + let n = XC.current c + c' = if isImage n && isMissing n + then XC.modifyContent replaceNode c + else c + in case XC.nextDF c' of + (Just cnext) -> replaceAll cnext + Nothing -> c' -- end of document + -- + isImage :: Content -> Bool + isImage (Elem e) = (elName e) == (uname "image") + isImage _ = False + -- + isMissing (Elem img@(Element _ _ _ _)) = + let imgAttrs = elAttribs img + badAttrs = map (attr ("l","href")) missingHrefs + in any (`elem` imgAttrs) badAttrs + isMissing _ = False + -- + replaceNode :: Content -> Content + replaceNode n@(Elem img@(Element _ _ _ _)) = + let attrs = elAttribs img + alt = getAttrVal attrs (uname "alt") + imtype = getAttrVal attrs (qname "l" "type") + in case (alt, imtype) of + (Just alt', Just imtype') -> + if imtype' == show NormalImage + then el "p" alt' + else txt alt' + (Just alt', Nothing) -> txt alt' -- no type attribute + _ -> n -- don't replace if alt text is not found + replaceNode n = n + -- + getAttrVal :: [X.Attr] -> QName -> Maybe String + getAttrVal attrs name = + case filter ((name ==) . attrKey) attrs of + (a:_) -> Just (attrVal a) + _ -> Nothing + + +-- | Wrap all inlines with an XML tag (given its unqualified name). +wrap :: String -> [Inline] -> FBM Content +wrap tagname inlines = el tagname `liftM` cMapM toXml inlines + +-- " Create a singleton list. +list :: a -> [a] +list = (:[]) + +-- | Convert an 'Inline' to plaintext. +plain :: Inline -> String +plain (Str s) = s +plain (Emph ss) = concat (map plain ss) +plain (Strong ss) = concat (map plain ss) +plain (Strikeout ss) = concat (map plain ss) +plain (Superscript ss) = concat (map plain ss) +plain (Subscript ss) = concat (map plain ss) +plain (SmallCaps ss) = concat (map plain ss) +plain (Quoted _ ss) = concat (map plain ss) +plain (Cite _ ss) = concat (map plain ss) -- FIXME +plain (Code _ s) = s +plain Space = " " +plain LineBreak = "\n" +plain (Math _ s) = s +plain (RawInline _ s) = s +plain (Link text (url,_)) = concat (map plain text ++ [" <", url, ">"]) +plain (Image alt _) = concat (map plain alt) +plain (Note _) = "" -- FIXME + +-- | Create an XML element. +el :: (Node t) + => String -- ^ unqualified element name + -> t -- ^ node contents + -> Content -- ^ XML content +el name cs = Elem $ unode name cs + +-- | Put empty lines around content +spaceBeforeAfter :: [Content] -> [Content] +spaceBeforeAfter cs = + let emptyline = el "empty-line" () + in [emptyline] ++ cs ++ [emptyline] + +-- | Create a plain-text XML content. +txt :: String -> Content +txt s = Text $ CData CDataText s Nothing + +-- | Create an XML attribute with an unqualified name. +uattr :: String -> String -> Text.XML.Light.Attr +uattr name val = Attr (uname name) val + +-- | Create an XML attribute with a qualified name from given namespace. +attr :: (String, String) -> String -> Text.XML.Light.Attr +attr (ns, name) val = Attr (qname ns name) val + +-- | Unqualified name +uname :: String -> QName +uname name = QName name Nothing Nothing + +-- | Qualified name +qname :: String -> String -> QName +qname ns name = QName name Nothing (Just ns) + +-- | Abbreviation for 'concatMap'. +cMap :: (a -> [b]) -> [a] -> [b] +cMap = concatMap + +-- | Monadic equivalent of 'concatMap'. +cMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] +cMapM f xs = concat `liftM` mapM f xs
\ No newline at end of file diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index b8474ee3f..c6c4a8fd7 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -32,6 +32,7 @@ Conversion of 'Pandoc' documents to HTML. module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Pandoc.Generic import Text.Pandoc.Readers.TeXMath @@ -272,7 +273,7 @@ elementToHtml slideLevel opts (Sec level num id' title' elements) = do -- title slides have no content of their own then filter isSec elements else elements - let header'' = if (writerStrictMarkdown opts || writerSectionDivs opts || + let header'' = if (writerSectionDivs opts || writerSlideVariant opts == S5Slides || slide) then header' else header' ! prefixedId opts id' @@ -378,13 +379,17 @@ blockToHtml _ Null = return mempty blockToHtml opts (Plain lst) = inlineListToHtml opts lst blockToHtml opts (Para [Image txt (s,tit)]) = do img <- inlineToHtml opts (Image txt (s,tit)) - capt <- inlineListToHtml opts txt + let tocapt = if writerHtml5 opts + then H5.figcaption + else H.p ! A.class_ "caption" + capt <- if null txt + then return mempty + else tocapt `fmap` inlineListToHtml opts txt return $ if writerHtml5 opts then H5.figure $ mconcat - [nl opts, img, H5.figcaption capt, nl opts] + [nl opts, img, capt, nl opts] else H.div ! A.class_ "figure" $ mconcat - [nl opts, img, H.p ! A.class_ "caption" $ capt, - nl opts] + [nl opts, img, capt, nl opts] blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst return $ H.p contents @@ -392,7 +397,7 @@ blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str blockToHtml _ (RawBlock _ _) = return mempty blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do - let tolhs = writerLiterateHaskell opts && + let tolhs = isEnabled Ext_literate_haskell opts && any (\c -> map toLower c == "haskell") classes && any (\c -> map toLower c == "literate") classes classes' = if tolhs @@ -618,7 +623,7 @@ inlineToHtml opts inline = ! A.src (toValue $ url ++ urlEncode str) ! A.alt (toValue str) ! A.title (toValue str) - let brtag = if writerHtml5 opts then H5.br else H.br + let brtag = if writerHtml5 opts then H5.br else H.br return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag @@ -638,7 +643,7 @@ inlineToHtml opts inline = Left _ -> inlineListToHtml opts (readTeXMath str) >>= return . (H.span ! A.class_ "math") - MathJax _ -> return $ toHtml $ + MathJax _ -> return $ H.span ! A.class_ "math" $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 7beee2d42..abbbd4d01 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -32,6 +32,7 @@ module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Templates import Text.Printf ( printf ) import Network.URI ( isAbsoluteURI, unEscapeString ) @@ -265,10 +266,12 @@ blockToLaTeX :: Block -- ^ Block to convert blockToLaTeX Null = return empty blockToLaTeX (Plain lst) = inlineListToLaTeX lst blockToLaTeX (Para [Image txt (src,tit)]) = do - capt <- inlineListToLaTeX txt + capt <- if null txt + then return empty + else (\c -> "\\caption" <> braces c) `fmap` inlineListToLaTeX txt img <- inlineToLaTeX (Image txt (src,tit)) return $ "\\begin{figure}[htbp]" $$ "\\centering" $$ img $$ - ("\\caption{" <> capt <> char '}') $$ "\\end{figure}" + capt $$ "\\end{figure}" blockToLaTeX (Para lst) = do result <- inlineListToLaTeX lst return result @@ -287,7 +290,7 @@ blockToLaTeX (BlockQuote lst) = do blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do opts <- gets stOptions case () of - _ | writerLiterateHaskell opts && "haskell" `elem` classes && + _ | isEnabled Ext_literate_haskell opts && "haskell" `elem` classes && "literate" `elem` classes -> lhsCodeBlock | writerListings opts -> listingsCodeBlock | writerHighlight opts && not (null classes) -> highlightedCodeBlock diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index c481e6c87..bececde25 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Writers.Man + Module : Text.Pandoc.Writers.Man Copyright : Copyright (C) 2007-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -32,6 +32,7 @@ module Text.Pandoc.Writers.Man ( writeMan) where import Text.Pandoc.Definition import Text.Pandoc.Templates import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Readers.TeXMath import Text.Printf ( printf ) import Data.List ( isPrefixOf, intersperse, intercalate ) @@ -44,21 +45,21 @@ data WriterState = WriterState { stNotes :: Notes -- | Convert Pandoc to Man. writeMan :: WriterOptions -> Pandoc -> String -writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False) +writeMan opts document = evalState (pandocToMan opts document) (WriterState [] False) -- | Return groff man representation of document. pandocToMan :: WriterOptions -> Pandoc -> State WriterState String pandocToMan opts (Pandoc (Meta title authors date) blocks) = do titleText <- inlineListToMan opts title authors' <- mapM (inlineListToMan opts) authors - date' <- inlineListToMan opts date + date' <- inlineListToMan opts date let colwidth = if writerWrapText opts then Just $ writerColumns opts else Nothing let render' = render colwidth let (cmdName, rest) = break (== ' ') $ render' titleText let (title', section) = case reverse cmdName of - (')':d:'(':xs) | d `elem` ['0'..'9'] -> + (')':d:'(':xs) | d `elem` ['0'..'9'] -> (text (reverse xs), char d) xs -> (text (reverse xs), doubleQuotes empty) let description = hsep $ @@ -86,7 +87,7 @@ notesToMan :: WriterOptions -> [[Block]] -> State WriterState Doc notesToMan opts notes = if null notes then return empty - else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= + else mapM (\(num, note) -> noteToMan opts num note) (zip [1..] notes) >>= return . (text ".SH NOTES" $$) . vcat -- | Return man representation of a note. @@ -94,7 +95,7 @@ noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc noteToMan opts num note = do contents <- blockListToMan opts note let marker = cr <> text ".SS " <> brackets (text (show num)) - return $ marker $$ contents + return $ marker $$ contents -- | Association list of characters to escape. manEscapes :: [(Char, String)] @@ -104,7 +105,7 @@ manEscapes = [ ('\160', "\\ ") , ('\x2014', "\\[em]") , ('\x2013', "\\[en]") , ('\x2026', "\\&...") - ] ++ backslashEscapes "@\\" + ] ++ backslashEscapes "-@\\" -- | Escape special characters for Man. escapeString :: String -> String @@ -113,7 +114,7 @@ escapeString = escapeStringUsing manEscapes -- | Escape a literal (code) section for Man. escapeCode :: String -> String escapeCode = concat . intersperse "\n" . map escapeLine . lines where - escapeLine codeline = + escapeLine codeline = case escapeStringUsing (manEscapes ++ backslashEscapes "\t ") codeline of a@('.':_) -> "\\&" ++ a b -> b @@ -150,14 +151,14 @@ splitSentences xs = -- | Convert Pandoc block element to man. blockToMan :: WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState Doc + -> State WriterState Doc blockToMan _ Null = return empty -blockToMan opts (Plain inlines) = +blockToMan opts (Plain inlines) = liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines blockToMan opts (Para inlines) = do contents <- liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines - return $ text ".PP" $$ contents + return $ text ".PP" $$ contents blockToMan _ (RawBlock "man" str) = return $ text str blockToMan _ (RawBlock _ _) = return empty blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" @@ -166,7 +167,7 @@ blockToMan opts (Header level inlines) = do let heading = case level of 1 -> ".SH " _ -> ".SS " - return $ text heading <> contents + return $ text heading <> contents blockToMan _ (CodeBlock _ str) = return $ text ".IP" $$ text ".nf" $$ @@ -174,10 +175,10 @@ blockToMan _ (CodeBlock _ str) = return $ text (escapeCode str) $$ text "\\f[]" $$ text ".fi" -blockToMan opts (BlockQuote blocks) = do +blockToMan opts (BlockQuote blocks) = do contents <- blockListToMan opts blocks return $ text ".RS" $$ contents $$ text ".RE" -blockToMan opts (Table caption alignments widths headers rows) = +blockToMan opts (Table caption alignments widths headers rows) = let aligncode AlignLeft = "l" aligncode AlignRight = "r" aligncode AlignCenter = "c" @@ -190,53 +191,53 @@ blockToMan opts (Table caption alignments widths headers rows) = else map (printf "w(%0.2fn)" . (70 *)) widths -- 78n default width - 8n indent = 70n let coldescriptions = text $ intercalate " " - (zipWith (\align width -> aligncode align ++ width) + (zipWith (\align width -> aligncode align ++ width) alignments iwidths) ++ "." colheadings <- mapM (blockListToMan opts) headers - let makeRow cols = text "T{" $$ - (vcat $ intersperse (text "T}@T{") cols) $$ + let makeRow cols = text "T{" $$ + (vcat $ intersperse (text "T}@T{") cols) $$ text "T}" let colheadings' = if all null headers then empty else makeRow colheadings $$ char '_' - body <- mapM (\row -> do + body <- mapM (\row -> do cols <- mapM (blockListToMan opts) row return $ makeRow cols) rows - return $ text ".PP" $$ caption' $$ - text ".TS" $$ text "tab(@);" $$ coldescriptions $$ + return $ text ".PP" $$ caption' $$ + text ".TS" $$ text "tab(@);" $$ coldescriptions $$ colheadings' $$ vcat body $$ text ".TE" blockToMan opts (BulletList items) = do contents <- mapM (bulletListItemToMan opts) items - return (vcat contents) + return (vcat contents) blockToMan opts (OrderedList attribs items) = do - let markers = take (length items) $ orderedListMarkers attribs + let markers = take (length items) $ orderedListMarkers attribs let indent = 1 + (maximum $ map length markers) contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ - zip markers items + zip markers items return (vcat contents) -blockToMan opts (DefinitionList items) = do +blockToMan opts (DefinitionList items) = do contents <- mapM (definitionListItemToMan opts) items return (vcat contents) -- | Convert bullet list item (list of blocks) to man. bulletListItemToMan :: WriterOptions -> [Block] -> State WriterState Doc bulletListItemToMan _ [] = return empty -bulletListItemToMan opts ((Para first):rest) = +bulletListItemToMan opts ((Para first):rest) = bulletListItemToMan opts ((Plain first):rest) bulletListItemToMan opts ((Plain first):rest) = do - first' <- blockToMan opts (Plain first) + first' <- blockToMan opts (Plain first) rest' <- blockListToMan opts rest let first'' = text ".IP \\[bu] 2" $$ first' let rest'' = if null rest then empty else text ".RS 2" $$ rest' $$ text ".RE" - return (first'' $$ rest'') + return (first'' $$ rest'') bulletListItemToMan opts (first:rest) = do first' <- blockToMan opts first rest' <- blockListToMan opts rest return $ text "\\[bu] .RS 2" $$ first' $$ rest' $$ text ".RE" - + -- | Convert ordered list item (a list of blocks) to man. orderedListItemToMan :: WriterOptions -- ^ options -> String -- ^ order marker for list item @@ -244,7 +245,7 @@ orderedListItemToMan :: WriterOptions -- ^ options -> [Block] -- ^ list item (list of blocks) -> State WriterState Doc orderedListItemToMan _ _ _ [] = return empty -orderedListItemToMan opts num indent ((Para first):rest) = +orderedListItemToMan opts num indent ((Para first):rest) = orderedListItemToMan opts num indent ((Plain first):rest) orderedListItemToMan opts num indent (first:rest) = do first' <- blockToMan opts first @@ -254,17 +255,17 @@ orderedListItemToMan opts num indent (first:rest) = do let rest'' = if null rest then empty else text ".RS 4" $$ rest' $$ text ".RE" - return $ first'' $$ rest'' + return $ first'' $$ rest'' -- | Convert definition list item (label, list of blocks) to man. definitionListItemToMan :: WriterOptions - -> ([Inline],[[Block]]) + -> ([Inline],[[Block]]) -> State WriterState Doc definitionListItemToMan opts (label, defs) = do labelText <- inlineListToMan opts label - contents <- if null defs + contents <- if null defs then return empty - else liftM vcat $ forM defs $ \blocks -> do + else liftM vcat $ forM defs $ \blocks -> do let (first, rest) = case blocks of ((Para x):y) -> (Plain x,y) (x:y) -> (x,y) @@ -278,7 +279,7 @@ definitionListItemToMan opts (label, defs) = do -- | Convert list of Pandoc block elements to man. blockListToMan :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState Doc + -> State WriterState Doc blockListToMan opts blocks = mapM (blockToMan opts) blocks >>= (return . vcat) @@ -292,7 +293,7 @@ inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to man. inlineToMan :: WriterOptions -> Inline -> State WriterState Doc -inlineToMan opts (Emph lst) = do +inlineToMan opts (Emph lst) = do contents <- inlineListToMan opts lst return $ text "\\f[I]" <> contents <> text "\\f[]" inlineToMan opts (Strong lst) = do @@ -333,16 +334,16 @@ inlineToMan opts (Link txt (src, _)) = do let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src return $ case txt of [Code _ s] - | s == srcSuffix -> char '<' <> text srcSuffix <> char '>' + | s == srcSuffix -> char '<' <> text srcSuffix <> char '>' _ -> linktext <> text " (" <> text src <> char ')' inlineToMan opts (Image alternate (source, tit)) = do - let txt = if (null alternate) || (alternate == [Str ""]) || + let txt = if (null alternate) || (alternate == [Str ""]) || (alternate == [Str source]) -- to prevent autolinks then [Str "image"] else alternate - linkPart <- inlineToMan opts (Link txt (source, tit)) + linkPart <- inlineToMan opts (Link txt (source, tit)) return $ char '[' <> text "IMAGE: " <> linkPart <> char ']' -inlineToMan _ (Note contents) = do +inlineToMan _ (Note contents) = do -- add to notes in state modify $ \st -> st{ stNotes = contents : stNotes st } notes <- liftM stNotes get diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 9cbcaeb47..d88419feb 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, TupleSections #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -18,9 +18,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Writers.Markdown + Module : Text.Pandoc.Writers.Markdown Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -35,11 +35,15 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared -import Text.Pandoc.Parsing hiding (blankline) -import Text.ParserCombinators.Parsec ( runParser, GenParser ) +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, char, space) import Data.List ( group, isPrefixOf, find, intersperse, transpose ) import Text.Pandoc.Pretty import Control.Monad.State +import qualified Data.Set as Set +import Text.Pandoc.Writers.HTML (writeHtmlString) +import Text.Pandoc.Readers.TeXMath (readTeXMath) +import Text.HTML.TagSoup (renderTags, parseTags, isTagText, Tag(..)) type Notes = [[Block]] type Refs = [([Inline], Target)] @@ -49,7 +53,7 @@ data WriterState = WriterState { stNotes :: Notes -- | Convert Pandoc to Markdown. writeMarkdown :: WriterOptions -> Pandoc -> String -writeMarkdown opts document = +writeMarkdown opts document = evalState (pandocToMarkdown opts document) WriterState{ stNotes = [] , stRefs = [] , stPlain = False } @@ -58,7 +62,9 @@ writeMarkdown opts document = -- pictures, or inline formatting). writePlain :: WriterOptions -> Pandoc -> String writePlain opts document = - evalState (pandocToMarkdown opts{writerStrictMarkdown = True} + evalState (pandocToMarkdown opts{ + writerExtensions = Set.delete Ext_escaped_line_breaks $ + writerExtensions opts } document') WriterState{ stNotes = [] , stRefs = [] , stPlain = True } @@ -81,15 +87,41 @@ plainify = bottomUp go go (Cite _ cits) = SmallCaps cits go x = x +pandocTitleBlock :: Doc -> [Doc] -> Doc -> Doc +pandocTitleBlock tit auths dat = + hang 2 (text "% ") tit <> cr <> + hang 2 (text "% ") (hcat (intersperse (text "; ") auths)) <> cr <> + hang 2 (text "% ") dat <> cr + +mmdTitleBlock :: Doc -> [Doc] -> Doc -> Doc +mmdTitleBlock tit auths dat = + hang 8 (text "Title: ") tit <> cr <> + hang 8 (text "Author: ") (hcat (intersperse (text "; ") auths)) <> cr <> + hang 8 (text "Date: ") dat <> cr + +plainTitleBlock :: Doc -> [Doc] -> Doc -> Doc +plainTitleBlock tit auths dat = + tit <> cr <> + (hcat (intersperse (text "; ") auths)) <> cr <> + dat <> cr + -- | Return markdown representation of document. pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState String pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do title' <- inlineListToMarkdown opts title authors' <- mapM (inlineListToMarkdown opts) authors date' <- inlineListToMarkdown opts date - let titleblock = not $ null title && null authors && null date + isPlain <- gets stPlain + let titleblock = case True of + _ | isPlain -> + plainTitleBlock title' authors' date' + | isEnabled Ext_pandoc_title_block opts -> + pandocTitleBlock title' authors' date' + | isEnabled Ext_mmd_title_block opts -> + mmdTitleBlock title' authors' date' + | otherwise -> empty let headerBlocks = filter isHeaderBlock blocks - let toc = if writerTableOfContents opts + let toc = if writerTableOfContents opts then tableOfContents opts headerBlocks else empty body <- blockListToMarkdown opts blocks @@ -106,11 +138,9 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do let context = writerVariables opts ++ [ ("toc", render colwidth toc) , ("body", main) - , ("title", render colwidth title') - , ("date", render colwidth date') ] ++ - [ ("titleblock", "yes") | titleblock ] ++ - [ ("author", render colwidth a) | a <- authors' ] + [ ("titleblock", render colwidth titleblock) + | not (null title && null authors && null date) ] if writerStandalone opts then return $ renderTemplate context $ writerTemplate opts else return main @@ -119,9 +149,9 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do refsToMarkdown :: WriterOptions -> Refs -> State WriterState Doc refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat --- | Return markdown representation of a reference key. -keyToMarkdown :: WriterOptions - -> ([Inline], (String, String)) +-- | Return markdown representation of a reference key. +keyToMarkdown :: WriterOptions + -> ([Inline], (String, String)) -> State WriterState Doc keyToMarkdown opts (label, (src, tit)) = do label' <- inlineListToMarkdown opts label @@ -133,7 +163,7 @@ keyToMarkdown opts (label, (src, tit)) = do -- | Return markdown representation of notes. notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc -notesToMarkdown opts notes = +notesToMarkdown opts notes = mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= return . vsep @@ -142,12 +172,16 @@ noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc noteToMarkdown opts num blocks = do contents <- blockListToMarkdown opts blocks let num' = text $ show num - let marker = text "[^" <> num' <> text "]:" + let marker = if isEnabled Ext_footnotes opts + then text "[^" <> num' <> text "]:" + else text "[" <> num' <> text "]" let markerSize = 4 + offset num' let spacer = case writerTabStop opts - markerSize of n | n > 0 -> text $ replicate n ' ' _ -> text " " - return $ hang (writerTabStop opts) (marker <> spacer) contents + return $ if isEnabled Ext_footnotes opts + then hang (writerTabStop opts) (marker <> spacer) contents + else marker <> spacer <> contents -- | Escape special characters for Markdown. escapeString :: String -> String @@ -155,7 +189,7 @@ escapeString = escapeStringUsing markdownEscapes where markdownEscapes = backslashEscapes "\\`*_$<>#~^" -- | Construct table of contents from list of header blocks. -tableOfContents :: WriterOptions -> [Block] -> Doc +tableOfContents :: WriterOptions -> [Block] -> Doc tableOfContents opts headers = let opts' = opts { writerIgnoreNotes = True } contents = BulletList $ map elementToListItem $ hierarchicalize headers @@ -166,7 +200,7 @@ tableOfContents opts headers = -- | Converts an Element to a list item for a table of contents, elementToListItem :: Element -> [Block] elementToListItem (Blk _) = [] -elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++ +elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++ if null subsecs then [] else [BulletList $ map elementToListItem subsecs] @@ -188,9 +222,9 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] <> "=\"" <> text v <> "\"") ks -- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char ParserState Char +olMarker :: Parser [Char] ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker - if delim == Period && + if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && start `elem` [1, 5, 10, 50, 100, 500, 1000])) then spaceChar >> spaceChar @@ -206,7 +240,7 @@ beginsWithOrderedListMarker str = -- | Convert Pandoc block element to markdown. blockToMarkdown :: WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState Doc + -> State WriterState Doc blockToMarkdown _ Null = return empty blockToMarkdown opts (Plain inlines) = do contents <- inlineListToMarkdown opts inlines @@ -215,14 +249,21 @@ blockToMarkdown opts (Para inlines) = do contents <- inlineListToMarkdown opts inlines -- escape if para starts with ordered list marker st <- get - let esc = if (not (writerStrictMarkdown opts)) && + let esc = if isEnabled Ext_all_symbols_escapable opts && not (stPlain st) && beginsWithOrderedListMarker (render Nothing contents) then text "\x200B" -- zero-width space, a hack else empty return $ esc <> contents <> blankline -blockToMarkdown _ (RawBlock f str) - | f == "html" || f == "latex" || f == "tex" || f == "markdown" = do +blockToMarkdown opts (RawBlock f str) + | f == "html" = do + st <- get + if stPlain st + then return empty + else return $ if isEnabled Ext_markdown_attribute opts + then text (addMarkdownAttribute str) <> text "\n" + else text str <> text "\n" + | f == "latex" || f == "tex" || f == "markdown" = do st <- get if stPlain st then return empty @@ -243,88 +284,148 @@ blockToMarkdown opts (Header level inlines) = do contents <> cr <> text (replicate (offset contents) '-') <> blankline -- ghc interprets '#' characters in column 1 as linenum specifiers. - _ | stPlain st || writerLiterateHaskell opts -> + _ | stPlain st || isEnabled Ext_literate_haskell opts -> contents <> blankline _ -> text (replicate level '#') <> space <> contents <> blankline blockToMarkdown opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes && "literate" `elem` classes && - writerLiterateHaskell opts = + isEnabled Ext_literate_haskell opts = return $ prefixed "> " (text str) <> blankline blockToMarkdown opts (CodeBlock attribs str) = return $ - if writerStrictMarkdown opts || attribs == nullAttr - then nest (writerTabStop opts) (text str) <> blankline - else -- use delimited code block - (tildes <> space <> attrs <> cr <> text str <> - cr <> tildes) <> blankline - where tildes = text "~~~~" - attrs = attrsToMarkdown attribs + case attribs of + x | x /= nullAttr && isEnabled Ext_fenced_code_blocks opts -> + tildes <> space <> attrs <> cr <> text str <> + cr <> tildes <> blankline + (_,(cls:_),_) | isEnabled Ext_backtick_code_blocks opts -> + backticks <> space <> text cls <> cr <> text str <> + cr <> backticks <> blankline + _ -> nest (writerTabStop opts) (text str) <> blankline + where tildes = text $ case [ln | ln <- lines str, all (=='~') ln] of + [] -> "~~~~" + xs -> case maximum $ map length xs of + n | n < 3 -> "~~~~" + | otherwise -> replicate (n+1) '~' + backticks = text "```" + attrs = if isEnabled Ext_fenced_code_attributes opts + then attrsToMarkdown attribs + else empty blockToMarkdown opts (BlockQuote blocks) = do st <- get -- if we're writing literate haskell, put a space before the bird tracks -- so they won't be interpreted as lhs... - let leader = if writerLiterateHaskell opts + let leader = if isEnabled Ext_literate_haskell opts then " > " else if stPlain st then " " else "> " contents <- blockListToMarkdown opts blocks return $ (prefixed leader contents) <> blankline -blockToMarkdown opts (Table caption aligns widths headers rows) = do +blockToMarkdown opts t@(Table caption aligns widths headers rows) = do caption' <- inlineListToMarkdown opts caption - let caption'' = if null caption + let caption'' = if null caption || not (isEnabled Ext_table_captions opts) then empty else blankline <> ": " <> caption' <> blankline - headers' <- mapM (blockListToMarkdown opts) headers + rawHeaders <- mapM (blockListToMarkdown opts) headers + rawRows <- mapM (mapM (blockListToMarkdown opts)) rows + let isSimple = all (==0) widths + (nst,tbl) <- case isSimple of + True | isEnabled Ext_simple_tables opts -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | isEnabled Ext_pipe_tables opts -> fmap (id,) $ + pipeTable (all null headers) aligns rawHeaders rawRows + | otherwise -> fmap (id,) $ + return $ text $ writeHtmlString def + $ Pandoc (Meta [] [] []) [t] + False | isEnabled Ext_multiline_tables opts -> fmap (nest 2,) $ + pandocTable opts (all null headers) aligns widths + rawHeaders rawRows + | otherwise -> fmap (id,) $ + return $ text $ writeHtmlString def + $ Pandoc (Meta [] [] []) [t] + return $ nst $ tbl $$ blankline $$ caption'' $$ blankline +blockToMarkdown opts (BulletList items) = do + contents <- mapM (bulletListItemToMarkdown opts) items + return $ cat contents <> blankline +blockToMarkdown opts (OrderedList (start,sty,delim) items) = do + let start' = if isEnabled Ext_startnum opts then start else 1 + let sty' = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle + let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim + let attribs = (start', sty', delim') + let markers = orderedListMarkers attribs + let markers' = map (\m -> if length m < 3 + then m ++ replicate (3 - length m) ' ' + else m) markers + contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ + zip markers' items + return $ cat contents <> blankline +blockToMarkdown opts (DefinitionList items) = do + contents <- mapM (definitionListItemToMarkdown opts) items + return $ cat contents <> blankline + +addMarkdownAttribute :: String -> String +addMarkdownAttribute s = + case span isTagText $ reverse $ parseTags s of + (xs,(TagOpen t attrs:rest)) -> + renderTags $ reverse rest ++ (TagOpen t attrs' : reverse xs) + where attrs' = ("markdown","1"):[(x,y) | (x,y) <- attrs, + x /= "markdown"] + _ -> s + +pipeTable :: Bool -> [Alignment] -> [Doc] -> [[Doc]] -> State WriterState Doc +pipeTable headless aligns rawHeaders rawRows = do + let torow cs = nowrap $ text "|" <> + hcat (intersperse (text "|") $ map chomp cs) <> text "|" + let toborder (a, h) = let wid = max (offset h) 3 + in text $ case a of + AlignLeft -> ':':replicate (wid - 1) '-' + AlignCenter -> ':':replicate (wid - 2) '-' ++ ":" + AlignRight -> replicate (wid - 1) '-' ++ ":" + AlignDefault -> replicate wid '-' + let header = if headless then empty else torow rawHeaders + let border = torow $ map toborder $ zip aligns rawHeaders + let body = vcat $ map torow rawRows + return $ header $$ border $$ body + +pandocTable :: WriterOptions -> Bool -> [Alignment] -> [Double] + -> [Doc] -> [[Doc]] -> State WriterState Doc +pandocTable opts headless aligns widths rawHeaders rawRows = do + let isSimple = all (==0) widths let alignHeader alignment = case alignment of AlignLeft -> lblock AlignCenter -> cblock AlignRight -> rblock AlignDefault -> lblock - rawRows <- mapM (mapM (blockListToMarkdown opts)) rows - let isSimple = all (==0) widths let numChars = maximum . map offset - let widthsInChars = - if isSimple - then map ((+2) . numChars) $ transpose (headers' : rawRows) - else map (floor . (fromIntegral (writerColumns opts) *)) widths + let widthsInChars = if isSimple + then map ((+2) . numChars) + $ transpose (rawHeaders : rawRows) + else map + (floor . (fromIntegral (writerColumns opts) *)) + widths let makeRow = hcat . intersperse (lblock 1 (text " ")) . (zipWith3 alignHeader aligns widthsInChars) let rows' = map makeRow rawRows - let head' = makeRow headers' + let head' = makeRow rawHeaders let maxRowHeight = maximum $ map height (head':rows') let underline = cat $ intersperse (text " ") $ map (\width -> text (replicate width '-')) widthsInChars let border = if maxRowHeight > 1 then text (replicate (sum widthsInChars + length widthsInChars - 1) '-') - else if all null headers + else if headless then underline else empty - let head'' = if all null headers + let head'' = if headless then empty else border <> cr <> head' let body = if maxRowHeight > 1 then vsep rows' else vcat rows' - let bottom = if all null headers + let bottom = if headless then underline else border - return $ nest 2 $ head'' $$ underline $$ body $$ - bottom $$ blankline $$ caption'' $$ blankline -blockToMarkdown opts (BulletList items) = do - contents <- mapM (bulletListItemToMarkdown opts) items - return $ cat contents <> blankline -blockToMarkdown opts (OrderedList attribs items) = do - let markers = orderedListMarkers attribs - let markers' = map (\m -> if length m < 3 - then m ++ replicate (3 - length m) ' ' - else m) markers - contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ - zip markers' items - return $ cat contents <> blankline -blockToMarkdown opts (DefinitionList items) = do - contents <- mapM (definitionListItemToMarkdown opts) items - return $ cat contents <> blankline + return $ head'' $$ underline $$ body $$ bottom -- | Convert bullet list item (list of blocks) to markdown. bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc @@ -349,32 +450,38 @@ orderedListItemToMarkdown opts marker items = do -- | Convert definition list item (label, list of blocks) to markdown. definitionListItemToMarkdown :: WriterOptions - -> ([Inline],[[Block]]) + -> ([Inline],[[Block]]) -> State WriterState Doc definitionListItemToMarkdown opts (label, defs) = do labelText <- inlineListToMarkdown opts label - let tabStop = writerTabStop opts - st <- get - let leader = if stPlain st then " " else ": " - let sps = case writerTabStop opts - 3 of - n | n > 0 -> text $ replicate n ' ' - _ -> text " " defs' <- mapM (mapM (blockToMarkdown opts)) defs - let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' - return $ nowrap labelText <> cr <> contents <> cr + if isEnabled Ext_definition_lists opts + then do + let tabStop = writerTabStop opts + st <- get + let leader = if stPlain st then " " else ": " + let sps = case writerTabStop opts - 3 of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' + return $ nowrap labelText <> cr <> contents <> cr + else do + return $ nowrap labelText <> text " " <> cr <> + vsep (map vsep defs') <> blankline -- | Convert list of Pandoc block elements to markdown. blockListToMarkdown :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState Doc + -> State WriterState Doc blockListToMarkdown opts blocks = mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat -- insert comment between list and indented code block, or the -- code block will be treated as a list continuation paragraph where fixBlocks (b : CodeBlock attr x : rest) - | (writerStrictMarkdown opts || attr == nullAttr) && isListBlock b = + | (not (isEnabled Ext_fenced_code_blocks opts) || attr == nullAttr) + && isListBlock b = b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x : - fixBlocks rest + fixBlocks rest fixBlocks (x : xs) = x : fixBlocks xs fixBlocks [] = [] isListBlock (BulletList _) = True @@ -412,7 +519,7 @@ escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc -inlineToMarkdown opts (Emph lst) = do +inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst return $ "*" <> contents <> "*" inlineToMarkdown opts (Strong lst) = do @@ -420,15 +527,21 @@ inlineToMarkdown opts (Strong lst) = do return $ "**" <> contents <> "**" inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst - return $ "~~" <> contents <> "~~" + return $ if isEnabled Ext_strikeout opts + then "~~" <> contents <> "~~" + else "<s>" <> contents <> "</s>" inlineToMarkdown opts (Superscript lst) = do let lst' = bottomUp escapeSpaces lst contents <- inlineListToMarkdown opts lst' - return $ "^" <> contents <> "^" + return $ if isEnabled Ext_superscript opts + then "^" <> contents <> "^" + else "<sup>" <> contents <> "</sup>" inlineToMarkdown opts (Subscript lst) = do let lst' = bottomUp escapeSpaces lst contents <- inlineListToMarkdown opts lst' - return $ "~" <> contents <> "~" + return $ if isEnabled Ext_subscript opts + then "~" <> contents <> "~" + else "<sub>" <> contents <> "</sub>" inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst @@ -437,33 +550,46 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = - let tickGroups = filter (\s -> '`' `elem` s) $ group str + let tickGroups = filter (\s -> '`' `elem` s) $ group str longest = if null tickGroups then 0 - else maximum $ map length tickGroups - marker = replicate (longest + 1) '`' + else maximum $ map length tickGroups + marker = replicate (longest + 1) '`' spacer = if (longest == 0) then "" else " " - attrs = if writerStrictMarkdown opts || attr == nullAttr - then empty - else attrsToMarkdown attr + attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr + then attrsToMarkdown attr + else empty in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs inlineToMarkdown _ (Str str) = do st <- get if stPlain st then return $ text str else return $ text $ escapeString str -inlineToMarkdown _ (Math InlineMath str) = - return $ "$" <> text str <> "$" -inlineToMarkdown _ (Math DisplayMath str) = - return $ "$$" <> text str <> "$$" -inlineToMarkdown _ (RawInline f str) - | f == "html" || f == "latex" || f == "tex" || f == "markdown" = +inlineToMarkdown opts (Math InlineMath str) + | isEnabled Ext_tex_math_dollars opts = + return $ "$" <> text str <> "$" + | isEnabled Ext_tex_math_single_backslash opts = + return $ "\\(" <> text str <> "\\)" + | isEnabled Ext_tex_math_double_backslash opts = + return $ "\\\\(" <> text str <> "\\\\)" + | otherwise = inlineListToMarkdown opts $ readTeXMath str +inlineToMarkdown opts (Math DisplayMath str) + | isEnabled Ext_tex_math_dollars opts = + return $ "$$" <> text str <> "$$" + | isEnabled Ext_tex_math_single_backslash opts = + return $ "\\[" <> text str <> "\\]" + | isEnabled Ext_tex_math_double_backslash opts = + return $ "\\\\[" <> text str <> "\\\\]" + | otherwise = (\x -> cr <> x <> cr) `fmap` + inlineListToMarkdown opts (readTeXMath str) +inlineToMarkdown opts (RawInline f str) + | f == "html" || f == "markdown" || + (isEnabled Ext_raw_tex opts && (f == "latex" || f == "tex")) = return $ text str inlineToMarkdown _ (RawInline _ _) = return empty -inlineToMarkdown opts (LineBreak) = return $ - if writerStrictMarkdown opts - then " " <> cr - else "\\" <> cr +inlineToMarkdown opts (LineBreak) + | isEnabled Ext_escaped_line_breaks opts = return $ "\\" <> cr + | otherwise = return $ " " <> cr inlineToMarkdown _ Space = return space inlineToMarkdown opts (Cite (c:cs) lst) | writerCiteMethod opts == Citeproc = inlineListToMarkdown opts lst @@ -513,7 +639,7 @@ inlineToMarkdown opts (Link txt (src, tit)) = do then "[]" else "[" <> reftext <> "]" in first <> second - else "[" <> linktext <> "](" <> + else "[" <> linktext <> "](" <> text src <> linktitle <> ")" inlineToMarkdown opts (Image alternate (source, tit)) = do let txt = if null alternate || alternate == [Str source] @@ -522,8 +648,10 @@ inlineToMarkdown opts (Image alternate (source, tit)) = do else alternate linkPart <- inlineToMarkdown opts (Link txt (source, tit)) return $ "!" <> linkPart -inlineToMarkdown _ (Note contents) = do +inlineToMarkdown opts (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get let ref = show $ (length $ stNotes st) - return $ "[^" <> text ref <> "]" + if isEnabled Ext_footnotes opts + then return $ "[^" <> text ref <> "]" + else return $ "[" <> text ref <> "]" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index b32c5327d..84d7393c1 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -17,9 +17,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Writers.MediaWiki + Module : Text.Pandoc.Writers.MediaWiki Copyright : Copyright (C) 2008-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -31,7 +31,8 @@ MediaWiki: <http://www.mediawiki.org/wiki/MediaWiki> -} module Text.Pandoc.Writers.MediaWiki ( writeMediaWiki ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.XML ( escapeStringForXML ) import Data.List ( intersect, intercalate ) @@ -46,9 +47,9 @@ data WriterState = WriterState { -- | Convert Pandoc to MediaWiki. writeMediaWiki :: WriterOptions -> Pandoc -> String -writeMediaWiki opts document = - evalState (pandocToMediaWiki opts document) - (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) +writeMediaWiki opts document = + evalState (pandocToMediaWiki opts document) + (WriterState { stNotes = False, stListLevel = [], stUseTags = False }) -- | Return MediaWiki representation of document. pandocToMediaWiki :: WriterOptions -> Pandoc -> State WriterState String @@ -57,7 +58,7 @@ pandocToMediaWiki opts (Pandoc _ blocks) = do notesExist <- get >>= return . stNotes let notes = if notesExist then "\n<references />" - else "" + else "" let main = body ++ notes let context = writerVariables opts ++ [ ("body", main) ] ++ @@ -70,22 +71,23 @@ pandocToMediaWiki opts (Pandoc _ blocks) = do escapeString :: String -> String escapeString = escapeStringForXML --- | Convert Pandoc block element to MediaWiki. +-- | Convert Pandoc block element to MediaWiki. blockToMediaWiki :: WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState String + -> State WriterState String blockToMediaWiki _ Null = return "" -blockToMediaWiki opts (Plain inlines) = +blockToMediaWiki opts (Plain inlines) = inlineListToMediaWiki opts inlines blockToMediaWiki opts (Para [Image txt (src,tit)]) = do - capt <- inlineListToMediaWiki opts txt + capt <- if null txt + then return "" + else ("|caption " ++) `fmap` inlineListToMediaWiki opts txt let opt = if null txt then "" - else "|alt=" ++ if null tit then capt else tit ++ - "|caption " ++ capt + else "|alt=" ++ if null tit then capt else tit ++ capt return $ "[[Image:" ++ src ++ "|frame|none" ++ opt ++ "]]\n" blockToMediaWiki opts (Para inlines) = do @@ -115,7 +117,7 @@ blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do "javascript", "latex", "lisp", "lua", "matlab", "mirc", "mpasm", "mysql", "nsis", "objc", "ocaml", "ocaml-brief", "oobas", "oracle8", "pascal", "perl", "php", "php-brief", "plsql", "python", "qbasic", "rails", "reg", "robots", "ruby", "sas", "scheme", "sdlbasic", - "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", + "smalltalk", "smarty", "sql", "tcl", "", "thinbasic", "tsql", "vb", "vbnet", "vhdl", "visualfoxpro", "winbatch", "xml", "xpp", "z80"] let (beg, end) = if null at then ("<pre" ++ if null classes then ">" else " class=\"" ++ unwords classes ++ "\">", "</pre>") @@ -124,7 +126,7 @@ blockToMediaWiki _ (CodeBlock (_,classes,_) str) = do blockToMediaWiki opts (BlockQuote blocks) = do contents <- blockListToMediaWiki opts blocks - return $ "<blockquote>" ++ contents ++ "</blockquote>" + return $ "<blockquote>" ++ contents ++ "</blockquote>" blockToMediaWiki opts (Table capt aligns widths headers rows') = do let alignStrings = map alignmentToString aligns @@ -221,7 +223,7 @@ listItemToMediaWiki opts items = do -- | Convert definition list item (label, list of blocks) to MediaWiki. definitionListItemToMediaWiki :: WriterOptions - -> ([Inline],[[Block]]) + -> ([Inline],[[Block]]) -> State WriterState String definitionListItemToMediaWiki opts (label, items) = do labelText <- inlineListToMediaWiki opts label @@ -242,7 +244,7 @@ isSimpleList x = BulletList items -> all isSimpleListItem items OrderedList (num, sty, _) items -> all isSimpleListItem items && num == 1 && sty `elem` [DefaultStyle, Decimal] - DefinitionList items -> all isSimpleListItem $ concatMap snd items + DefinitionList items -> all isSimpleListItem $ concatMap snd items _ -> False -- | True if list item can be handled with the simple wiki syntax. False if @@ -287,8 +289,8 @@ tableRowToMediaWiki opts alignStrings rownum cols' = do 0 -> "header" x | x `rem` 2 == 1 -> "odd" _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToMediaWiki opts celltype alignment item) + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToMediaWiki opts celltype alignment item) alignStrings cols' return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" @@ -313,7 +315,7 @@ tableItemToMediaWiki opts celltype align' item = do -- | Convert list of Pandoc block elements to MediaWiki. blockListToMediaWiki :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState String + -> State WriterState String blockListToMediaWiki opts blocks = mapM (blockToMediaWiki opts) blocks >>= return . vcat @@ -325,9 +327,9 @@ inlineListToMediaWiki opts lst = -- | Convert Pandoc inline element to MediaWiki. inlineToMediaWiki :: WriterOptions -> Inline -> State WriterState String -inlineToMediaWiki opts (Emph lst) = do +inlineToMediaWiki opts (Emph lst) = do contents <- inlineListToMediaWiki opts lst - return $ "''" ++ contents ++ "''" + return $ "''" ++ contents ++ "''" inlineToMediaWiki opts (Strong lst) = do contents <- inlineListToMediaWiki opts lst @@ -365,8 +367,8 @@ inlineToMediaWiki _ (Str str) = return $ escapeString str inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>" -- note: str should NOT be escaped -inlineToMediaWiki _ (RawInline "mediawiki" str) = return str -inlineToMediaWiki _ (RawInline "html" str) = return str +inlineToMediaWiki _ (RawInline "mediawiki" str) = return str +inlineToMediaWiki _ (RawInline "html" str) = return str inlineToMediaWiki _ (RawInline _ _) = return "" inlineToMediaWiki _ (LineBreak) = return "<br />\n" @@ -392,7 +394,7 @@ inlineToMediaWiki opts (Image alt (source, tit)) = do else "|" ++ tit return $ "[[Image:" ++ source ++ txt ++ "]]" -inlineToMediaWiki opts (Note contents) = do +inlineToMediaWiki opts (Note contents) = do contents' <- blockListToMediaWiki opts contents modify (\s -> s { stNotes = True }) return $ "<ref>" ++ contents' ++ "</ref>" diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index d2b56cd17..7fb304e86 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Native Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -34,7 +34,7 @@ metadata. -} module Text.Pandoc.Writers.Native ( writeNative ) where -import Text.Pandoc.Shared ( WriterOptions(..) ) +import Text.Pandoc.Options ( WriterOptions(..) ) import Data.List ( intersperse ) import Text.Pandoc.Definition import Text.Pandoc.Pretty @@ -47,17 +47,17 @@ prettyList ds = prettyBlock :: Block -> Doc prettyBlock (BlockQuote blocks) = "BlockQuote" $$ prettyList (map prettyBlock blocks) -prettyBlock (OrderedList attribs blockLists) = +prettyBlock (OrderedList attribs blockLists) = "OrderedList" <> space <> text (show attribs) $$ (prettyList $ map (prettyList . map prettyBlock) blockLists) -prettyBlock (BulletList blockLists) = +prettyBlock (BulletList blockLists) = "BulletList" $$ (prettyList $ map (prettyList . map prettyBlock) blockLists) prettyBlock (DefinitionList items) = "DefinitionList" $$ (prettyList $ map deflistitem items) where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <> nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")" -prettyBlock (Table caption aligns widths header rows) = +prettyBlock (Table caption aligns widths header rows) = "Table " <> text (show caption) <> " " <> text (show aligns) <> " " <> text (show widths) $$ prettyRow header $$ diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 9e3dba98a..f43d0a087 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -36,7 +36,8 @@ import Data.ByteString.Lazy.UTF8 ( fromString ) import Codec.Archive.Zip import Data.Time.Clock.POSIX import Paths_pandoc ( getDataFileName ) -import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Options ( WriterOptions(..) ) +import Text.Pandoc.Shared ( stringify ) import Text.Pandoc.ImageSize ( readImageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition @@ -47,16 +48,16 @@ import Control.Monad (liftM) import Network.URI ( unEscapeString ) import Text.Pandoc.XML import Text.Pandoc.Pretty +import qualified Control.Exception as E -- | Produce an ODT file from a Pandoc document. -writeODT :: Maybe FilePath -- ^ Path specified by --reference-odt - -> WriterOptions -- ^ Writer options +writeODT :: WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString -writeODT mbRefOdt opts doc@(Pandoc (Meta title _ _) _) = do +writeODT opts doc@(Pandoc (Meta title _ _) _) = do let datadir = writerUserDataDir opts refArchive <- liftM toArchive $ - case mbRefOdt of + case writerReferenceODT opts of Just f -> B.readFile f Nothing -> do let defaultODT = getDataFileName "reference.odt" >>= B.readFile @@ -128,9 +129,9 @@ transformPic sourceDir entriesRef (Image lab (src,tit)) = do Nothing -> tit entries <- readIORef entriesRef let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src' - catch (readEntry [] (sourceDir </> src') >>= \entry -> - modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >> - return (Image lab (newsrc, tit'))) - (\_ -> return (Emph lab)) + E.catch (readEntry [] (sourceDir </> src') >>= \entry -> + modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >> + return (Image lab (newsrc, tit'))) + (\e -> let _ = (e :: E.SomeException) in return (Emph lab)) transformPic _ _ x = return x diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index a0317511a..027ddfda1 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -31,7 +31,7 @@ Conversion of 'Pandoc' documents to OpenDocument XML. -} module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.XML import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 7eb943a22..b885a7a40 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -20,7 +20,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Org Copyright : Copyright (C) 2010 Puneeth Chaganti - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : Puneeth Chaganti <punchagan@gmail.com> Stability : alpha @@ -32,14 +32,15 @@ Org-Mode: <http://orgmode.org> -} module Text.Pandoc.Writers.Org ( writeOrg) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Pretty import Text.Pandoc.Templates (renderTemplate) import Data.List ( intersect, intersperse, transpose ) import Control.Monad.State import Control.Applicative ( (<$>) ) -data WriterState = +data WriterState = WriterState { stNotes :: [[Block]] , stLinks :: Bool , stImages :: Bool @@ -49,7 +50,7 @@ data WriterState = -- | Convert Pandoc to Org. writeOrg :: WriterOptions -> Pandoc -> String -writeOrg opts document = +writeOrg opts document = let st = WriterState { stNotes = [], stLinks = False, stImages = False, stHasMath = False, stOptions = opts } @@ -82,8 +83,8 @@ pandocToOrg (Pandoc (Meta tit auth dat) blocks) = do -- | Return Org representation of notes. notesToOrg :: [[Block]] -> State WriterState Doc -notesToOrg notes = - mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>= +notesToOrg notes = + mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>= return . vsep -- | Return Org representation of a note. @@ -106,21 +107,24 @@ titleToOrg :: [Inline] -> State WriterState Doc titleToOrg [] = return empty titleToOrg lst = do contents <- inlineListToOrg lst - return $ "#+TITLE: " <> contents + return $ "#+TITLE: " <> contents --- | Convert Pandoc block element to Org. +-- | Convert Pandoc block element to Org. blockToOrg :: Block -- ^ Block element - -> State WriterState Doc + -> State WriterState Doc blockToOrg Null = return empty blockToOrg (Plain inlines) = inlineListToOrg inlines blockToOrg (Para [Image txt (src,tit)]) = do - capt <- inlineListToOrg txt + capt <- if null txt + then return empty + else (\c -> "#+CAPTION: " <> c <> blankline) `fmap` + inlineListToOrg txt img <- inlineToOrg (Image txt (src,tit)) - return $ "#+CAPTION: " <> capt <> blankline <> img + return $ capt <> img blockToOrg (Para inlines) = do contents <- inlineListToOrg inlines return $ contents <> blankline -blockToOrg (RawBlock "html" str) = +blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" = @@ -134,17 +138,17 @@ blockToOrg (Header level inlines) = do blockToOrg (CodeBlock (_,classes,_) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts - let at = classes `intersect` ["asymptote", "C", "clojure", "css", "ditaa", - "dot", "emacs-lisp", "gnuplot", "haskell", "js", "latex", - "ledger", "lisp", "matlab", "mscgen", "ocaml", "octave", - "oz", "perl", "plantuml", "python", "R", "ruby", "sass", + let at = classes `intersect` ["asymptote", "C", "clojure", "css", "ditaa", + "dot", "emacs-lisp", "gnuplot", "haskell", "js", "latex", + "ledger", "lisp", "matlab", "mscgen", "ocaml", "octave", + "oz", "perl", "plantuml", "python", "R", "ruby", "sass", "scheme", "screen", "sh", "sql", "sqlite"] let (beg, end) = case at of [] -> ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE") (x:_) -> ("#+BEGIN_SRC " ++ x, "#+END_SRC") return $ text beg $$ nest tabstop (text str) $$ text end $$ blankline blockToOrg (BlockQuote blocks) = do - contents <- blockListToOrg blocks + contents <- blockListToOrg blocks return $ blankline $$ "#+BEGIN_QUOTE" $$ nest 2 contents $$ "#+END_QUOTE" $$ blankline blockToOrg (Table caption' _ _ headers rows) = do @@ -155,11 +159,11 @@ blockToOrg (Table caption' _ _ headers rows) = do headers' <- mapM blockListToOrg headers rawRows <- mapM (mapM blockListToOrg) rows let numChars = maximum . map offset - -- FIXME: width is not being used. + -- FIXME: width is not being used. let widthsInChars = map ((+2) . numChars) $ transpose (headers' : rawRows) - -- FIXME: Org doesn't allow blocks with height more than 1. - let hpipeBlocks blocks = hcat [beg, middle, end] + -- FIXME: Org doesn't allow blocks with height more than 1. + let hpipeBlocks blocks = hcat [beg, middle, end] where h = maximum (map height blocks) sep' = lblock 3 $ vcat (map text $ replicate h " | ") beg = lblock 2 $ vcat (map text $ replicate h "| ") @@ -170,7 +174,7 @@ blockToOrg (Table caption' _ _ headers rows) = do rows' <- mapM (\row -> do cols <- mapM blockListToOrg row return $ makeRow cols) rows let border ch = char '|' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ + (hcat $ intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '|' let body = vcat rows' @@ -186,7 +190,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do let delim' = case delim of TwoParens -> OneParen x -> x - let markers = take (length items) $ orderedListMarkers + let markers = take (length items) $ orderedListMarkers (start, Decimal, delim') let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m @@ -222,7 +226,7 @@ definitionListItemToOrg (label, defs) = do -- | Convert list of Pandoc block elements to Org. blockListToOrg :: [Block] -- ^ List of block elements - -> State WriterState Doc + -> State WriterState Doc blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat -- | Convert list of Pandoc inline elements to Org. @@ -231,19 +235,19 @@ inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat -- | Convert Pandoc inline element to Org. inlineToOrg :: Inline -> State WriterState Doc -inlineToOrg (Emph lst) = do +inlineToOrg (Emph lst) = do contents <- inlineListToOrg lst return $ "/" <> contents <> "/" inlineToOrg (Strong lst) = do contents <- inlineListToOrg lst return $ "*" <> contents <> "*" -inlineToOrg (Strikeout lst) = do +inlineToOrg (Strikeout lst) = do contents <- inlineListToOrg lst return $ "+" <> contents <> "+" -inlineToOrg (Superscript lst) = do +inlineToOrg (Superscript lst) = do contents <- inlineListToOrg lst return $ "^{" <> contents <> "}" -inlineToOrg (Subscript lst) = do +inlineToOrg (Subscript lst) = do contents <- inlineListToOrg lst return $ "_{" <> contents <> "}" inlineToOrg (SmallCaps lst) = inlineListToOrg lst @@ -276,7 +280,7 @@ inlineToOrg (Link txt (src, _)) = do inlineToOrg (Image _ (source, _)) = do modify $ \s -> s{ stImages = True } return $ "[[" <> text source <> "]]" -inlineToOrg (Note contents) = do +inlineToOrg (Note contents) = do -- add to notes in state notes <- get >>= (return . stNotes) modify $ \st -> st { stNotes = contents:notes } diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index d98079940..5b0b5a414 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -18,9 +18,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.Writers.RST + Module : Text.Pandoc.Writers.RST Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -32,7 +32,8 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html> -} module Text.Pandoc.Writers.RST ( writeRST) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Data.List ( isPrefixOf, intersperse, transpose ) import Text.Pandoc.Pretty @@ -42,7 +43,7 @@ import Data.Char (isSpace) type Refs = [([Inline], Target)] -data WriterState = +data WriterState = WriterState { stNotes :: [[Block]] , stLinks :: Refs , stImages :: Refs @@ -52,7 +53,7 @@ data WriterState = -- | Convert Pandoc to RST. writeRST :: WriterOptions -> Pandoc -> String -writeRST opts document = +writeRST opts document = let st = WriterState { stNotes = [], stLinks = [], stImages = [], stHasMath = False, stOptions = opts } @@ -89,8 +90,8 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do refsToRST :: Refs -> State WriterState Doc refsToRST refs = mapM keyToRST refs >>= return . vcat --- | Return RST representation of a reference key. -keyToRST :: ([Inline], (String, String)) +-- | Return RST representation of a reference key. +keyToRST :: ([Inline], (String, String)) -> State WriterState Doc keyToRST (label, (src, _)) = do label' <- inlineListToRST label @@ -101,7 +102,7 @@ keyToRST (label, (src, _)) = do -- | Return RST representation of notes. notesToRST :: [[Block]] -> State WriterState Doc -notesToRST notes = +notesToRST notes = mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>= return . vsep @@ -116,8 +117,8 @@ noteToRST num note = do pictRefsToRST :: Refs -> State WriterState Doc pictRefsToRST refs = mapM pictToRST refs >>= return . vcat --- | Return RST representation of a picture substitution reference. -pictToRST :: ([Inline], (String, String)) +-- | Return RST representation of a picture substitution reference. +pictToRST :: ([Inline], (String, String)) -> State WriterState Doc pictToRST (label, (src, _)) = do label' <- inlineListToRST label @@ -135,9 +136,9 @@ titleToRST lst = do let border = text (replicate titleLength '=') return $ border $$ contents $$ border --- | Convert Pandoc block element to RST. +-- | Convert Pandoc block element to RST. blockToRST :: Block -- ^ Block element - -> State WriterState Doc + -> State WriterState Doc blockToRST Null = return empty blockToRST (Plain inlines) = inlineListToRST inlines blockToRST (Para [Image txt (src,tit)]) = do @@ -163,12 +164,12 @@ blockToRST (CodeBlock (_,classes,_) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts if "haskell" `elem` classes && "literate" `elem` classes && - writerLiterateHaskell opts + isEnabled Ext_literate_haskell opts then return $ prefixed "> " (text str) $$ blankline else return $ "::" $+$ nest tabstop (text str) $$ blankline blockToRST (BlockQuote blocks) = do tabstop <- get >>= (return . writerTabStop . stOptions) - contents <- blockListToRST blocks + contents <- blockListToRST blocks return $ (nest tabstop contents) <> blankline blockToRST (Table caption _ widths headers rows) = do caption' <- inlineListToRST caption @@ -184,7 +185,7 @@ blockToRST (Table caption _ widths headers rows) = do if isSimple then map ((+2) . numChars) $ transpose (headers' : rawRows) else map (floor . (fromIntegral (writerColumns opts) *)) widths - let hpipeBlocks blocks = hcat [beg, middle, end] + let hpipeBlocks blocks = hcat [beg, middle, end] where h = maximum (map height blocks) sep' = lblock 3 $ vcat (map text $ replicate h " | ") beg = lblock 2 $ vcat (map text $ replicate h "| ") @@ -195,7 +196,7 @@ blockToRST (Table caption _ widths headers rows) = do rows' <- mapM (\row -> do cols <- mapM blockListToRST row return $ makeRow cols) rows let border ch = char '+' <> char ch <> - (hcat $ intersperse (char ch <> char '+' <> char ch) $ + (hcat $ intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '+' let body = vcat $ intersperse (border '-') rows' @@ -208,9 +209,9 @@ blockToRST (BulletList items) = do -- ensure that sublists have preceding blank line return $ blankline $$ vcat contents $$ blankline blockToRST (OrderedList (start, style', delim) items) = do - let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim + let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim then take (length items) $ repeat "#." - else take (length items) $ orderedListMarkers + else take (length items) $ orderedListMarkers (start, style', delim) let maxMarkerLength = maximum $ map length markers let markers' = map (\m -> let s = maxMarkerLength - length m @@ -249,7 +250,7 @@ definitionListItemToRST (label, defs) = do -- | Convert list of Pandoc block elements to RST. blockListToRST :: [Block] -- ^ List of block elements - -> State WriterState Doc + -> State WriterState Doc blockListToRST blocks = mapM blockToRST blocks >>= return . vcat -- | Convert list of Pandoc inline elements to RST. @@ -303,19 +304,19 @@ inlineListToRST lst = mapM inlineToRST (insertBS lst) >>= return . hcat -- | Convert Pandoc inline element to RST. inlineToRST :: Inline -> State WriterState Doc -inlineToRST (Emph lst) = do +inlineToRST (Emph lst) = do contents <- inlineListToRST lst return $ "*" <> contents <> "*" inlineToRST (Strong lst) = do contents <- inlineListToRST lst return $ "**" <> contents <> "**" -inlineToRST (Strikeout lst) = do +inlineToRST (Strikeout lst) = do contents <- inlineListToRST lst return $ "[STRIKEOUT:" <> contents <> "]" -inlineToRST (Superscript lst) = do +inlineToRST (Superscript lst) = do contents <- inlineListToRST lst return $ ":sup:`" <> contents <> "`" -inlineToRST (Subscript lst) = do +inlineToRST (Subscript lst) = do contents <- inlineListToRST lst return $ ":sub:`" <> contents <> "`" inlineToRST (SmallCaps lst) = inlineListToRST lst @@ -358,7 +359,7 @@ inlineToRST (Link txt (src, tit)) = do else return $ "`" <> linktext <> " <" <> text src <> ">`_" inlineToRST (Image alternate (source, tit)) = do pics <- get >>= return . stImages - let labelsUsed = map fst pics + let labelsUsed = map fst pics let txt = if null alternate || alternate == [Str ""] || alternate `elem` labelsUsed then [Str $ "image" ++ show (length pics)] @@ -369,7 +370,7 @@ inlineToRST (Image alternate (source, tit)) = do modify $ \st -> st { stImages = pics' } label <- inlineListToRST txt return $ "|" <> label <> "|" -inlineToRST (Note contents) = do +inlineToRST (Note contents) = do -- add to notes in state notes <- get >>= return . stNotes modify $ \st -> st { stNotes = contents:notes } diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index 4e7c2a7cd..1919eb3f2 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -19,16 +19,17 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.RTF Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of 'Pandoc' documents to RTF (rich text format). -} module Text.Pandoc.Writers.RTF ( writeRTF, rtfEmbedImage ) where import Text.Pandoc.Definition +import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate) @@ -38,6 +39,7 @@ import System.FilePath ( takeExtension ) import qualified Data.ByteString as B import Text.Printf ( printf ) import Network.URI ( isAbsoluteURI, unEscapeString ) +import qualified Control.Exception as E -- | Convert Image inlines into a raw RTF embedded image, read from a file. -- If file not found or filetype not jpeg or png, leave the inline unchanged. @@ -47,7 +49,8 @@ rtfEmbedImage x@(Image _ (src,_)) = do if ext `elem` [".jpg",".jpeg",".png"] && not (isAbsoluteURI src) then do let src' = unEscapeString src - imgdata <- catch (B.readFile src') (\_ -> return B.empty) + imgdata <- E.catch (B.readFile src') + (\e -> let _ = (e :: E.SomeException) in return B.empty) let bytes = map (printf "%02x") $ B.unpack imgdata let filetype = case ext of ".jpg" -> "\\jpegblip" @@ -63,7 +66,7 @@ rtfEmbedImage x = return x -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String -writeRTF options (Pandoc (Meta title authors date) blocks) = +writeRTF options (Pandoc (Meta title authors date) blocks) = let titletext = inlineListToRTF title authorstext = map inlineListToRTF authors datetext = inlineListToRTF date @@ -82,11 +85,11 @@ writeRTF options (Pandoc (Meta title authors date) blocks) = else body -- | Construct table of contents from list of header blocks. -tableOfContents :: [Block] -> String +tableOfContents :: [Block] -> String tableOfContents headers = let contentsTree = hierarchicalize headers - in concatMap (blockToRTF 0 AlignDefault) $ - [Header 1 [Str "Contents"], + in concatMap (blockToRTF 0 AlignDefault) $ + [Header 1 [Str "Contents"], BulletList (map elementToListItem contentsTree)] elementToListItem :: Element -> [Block] @@ -100,7 +103,7 @@ elementToListItem (Sec _ _ _ sectext subsecs) = [Plain sectext] ++ handleUnicode :: String -> String handleUnicode [] = [] handleUnicode (c:cs) = - if ord c > 127 + if ord c > 127 then '\\':'u':(show (ord c)) ++ "?" ++ handleUnicode cs else c:(handleUnicode cs) @@ -130,32 +133,32 @@ rtfParSpaced :: Int -- ^ space after (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment -> String -- ^ string with content - -> String -rtfParSpaced spaceAfter indent firstLineIndent alignment content = + -> String +rtfParSpaced spaceAfter indent firstLineIndent alignment content = let alignString = case alignment of AlignLeft -> "\\ql " AlignRight -> "\\qr " AlignCenter -> "\\qc " AlignDefault -> "\\ql " in "{\\pard " ++ alignString ++ - "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ + "\\f0 \\sa" ++ (show spaceAfter) ++ " \\li" ++ (show indent) ++ " \\fi" ++ (show firstLineIndent) ++ " " ++ content ++ "\\par}\n" --- | Default paragraph. +-- | Default paragraph. rtfPar :: Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment -> String -- ^ string with content - -> String -rtfPar = rtfParSpaced 180 + -> String +rtfPar = rtfParSpaced 180 -- | Compact paragraph (e.g. for compact list items). rtfCompact :: Int -- ^ block indent (in twips) -> Int -- ^ first line indent (relative to block) (in twips) -> Alignment -- ^ alignment -> String -- ^ string with content - -> String -rtfCompact = rtfParSpaced 0 + -> String +rtfCompact = rtfParSpaced 0 -- number of twips to indent indentIncrement :: Int @@ -172,7 +175,7 @@ bulletMarker indent = case indent `mod` 720 of -- | Returns appropriate (list of) ordered list markers for indent level. orderedMarkers :: Int -> ListAttributes -> [String] -orderedMarkers indent (start, style, delim) = +orderedMarkers indent (start, style, delim) = if style == DefaultStyle && delim == DefaultDelim then case indent `mod` 720 of 0 -> orderedListMarkers (start, Decimal, Period) @@ -185,30 +188,30 @@ blockToRTF :: Int -- ^ indent level -> Block -- ^ block to convert -> String blockToRTF _ _ Null = "" -blockToRTF indent alignment (Plain lst) = +blockToRTF indent alignment (Plain lst) = rtfCompact indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (Para lst) = +blockToRTF indent alignment (Para lst) = rtfPar indent 0 alignment $ inlineListToRTF lst -blockToRTF indent alignment (BlockQuote lst) = - concatMap (blockToRTF (indent + indentIncrement) alignment) lst +blockToRTF indent alignment (BlockQuote lst) = + concatMap (blockToRTF (indent + indentIncrement) alignment) lst blockToRTF indent _ (CodeBlock _ str) = rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) blockToRTF _ _ (RawBlock "rtf" str) = str blockToRTF _ _ (RawBlock _ _) = "" -blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ +blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst -blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ +blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ zipWith (listItemToRTF alignment indent) (orderedMarkers indent attribs) lst -blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ +blockToRTF indent alignment (DefinitionList lst) = spaceAtEnd $ concatMap (definitionListItemToRTF alignment indent) lst -blockToRTF indent _ HorizontalRule = +blockToRTF indent _ HorizontalRule = rtfPar indent 0 AlignCenter "\\emdash\\emdash\\emdash\\emdash\\emdash" blockToRTF indent alignment (Header level lst) = rtfPar indent 0 alignment $ "\\b \\fs" ++ (show (40 - (level * 4))) ++ " " ++ inlineListToRTF lst -blockToRTF indent alignment (Table caption aligns sizes headers rows) = +blockToRTF indent alignment (Table caption aligns sizes headers rows) = (if all null headers then "" - else tableRowToRTF True indent aligns sizes headers) ++ + else tableRowToRTF True indent aligns sizes headers) ++ concatMap (tableRowToRTF False indent aligns sizes) rows ++ rtfPar indent 0 alignment (inlineListToRTF caption) @@ -230,7 +233,7 @@ tableRowToRTF header indent aligns sizes' cols = end = "}\n\\intbl\\row}\n" in start ++ columns ++ end -tableItemToRTF :: Int -> Alignment -> [Block] -> String +tableItemToRTF :: Int -> Alignment -> [Block] -> String tableItemToRTF indent alignment item = let contents = concatMap (blockToRTF indent alignment) item in "{\\intbl " ++ contents ++ "\\cell}\n" @@ -238,7 +241,7 @@ tableItemToRTF indent alignment item = -- | Ensure that there's the same amount of space after compact -- lists as after regular lists. spaceAtEnd :: String -> String -spaceAtEnd str = +spaceAtEnd str = if isSuffixOf "\\par}\n" str then (take ((length str) - 6) str) ++ "\\sa180\\par}\n" else str @@ -249,10 +252,10 @@ listItemToRTF :: Alignment -- ^ alignment -> String -- ^ list start marker -> [Block] -- ^ list item (list of blocks) -> [Char] -listItemToRTF alignment indent marker [] = - rtfCompact (indent + listIncrement) (0 - listIncrement) alignment - (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") -listItemToRTF alignment indent marker list = +listItemToRTF alignment indent marker [] = + rtfCompact (indent + listIncrement) (0 - listIncrement) alignment + (marker ++ "\\tx" ++ (show listIncrement) ++ "\\tab ") +listItemToRTF alignment indent marker list = let (first:rest) = map (blockToRTF (indent + listIncrement) alignment) list listMarker = "\\fi" ++ show (0 - listIncrement) ++ " " ++ marker ++ "\\tx" ++ show listIncrement ++ "\\tab" @@ -275,7 +278,7 @@ definitionListItemToRTF alignment indent (label, defs) = let labelText = blockToRTF indent alignment (Plain label) itemsText = concatMap (blockToRTF (indent + listIncrement) alignment) $ concat defs - in labelText ++ itemsText + in labelText ++ itemsText -- | Convert list of inline items to RTF. inlineListToRTF :: [Inline] -- ^ list of inlines to convert @@ -291,9 +294,9 @@ inlineToRTF (Strikeout lst) = "{\\strike " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Superscript lst) = "{\\super " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (Subscript lst) = "{\\sub " ++ (inlineListToRTF lst) ++ "}" inlineToRTF (SmallCaps lst) = "{\\scaps " ++ (inlineListToRTF lst) ++ "}" -inlineToRTF (Quoted SingleQuote lst) = +inlineToRTF (Quoted SingleQuote lst) = "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" -inlineToRTF (Quoted DoubleQuote lst) = +inlineToRTF (Quoted DoubleQuote lst) = "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str @@ -303,11 +306,11 @@ inlineToRTF (RawInline "rtf" str) = str inlineToRTF (RawInline _ _) = "" inlineToRTF (LineBreak) = "\\line " inlineToRTF Space = " " -inlineToRTF (Link text (src, _)) = - "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ +inlineToRTF (Link text (src, _)) = + "{\\field{\\*\\fldinst{HYPERLINK \"" ++ (codeStringToRTF src) ++ "\"}}{\\fldrslt{\\ul\n" ++ (inlineListToRTF text) ++ "\n}}}\n" -inlineToRTF (Image _ (source, _)) = - "{\\cf1 [image: " ++ source ++ "]\\cf0}" +inlineToRTF (Image _ (source, _)) = + "{\\cf1 [image: " ++ source ++ "]\\cf0}" inlineToRTF (Note contents) = - "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ + "{\\super\\chftn}{\\*\\footnote\\chftn\\~\\plain\\pard " ++ (concatMap (blockToRTF 0 AlignDefault) contents) ++ "}" diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 6bb782899..40e76c615 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -19,16 +19,17 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Texinfo Copyright : Copyright (C) 2008-2010 John MacFarlane and Peter Wang - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of 'Pandoc' format into Texinfo. -} module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Text.Pandoc.Definition +import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Printf ( printf ) @@ -40,7 +41,7 @@ import Text.Pandoc.Pretty import Network.URI ( isAbsoluteURI, unEscapeString ) import System.FilePath -data WriterState = +data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout , stSuperscript :: Bool -- document contains superscript , stSubscript :: Bool -- document contains subscript @@ -53,8 +54,8 @@ data WriterState = -- | Convert Pandoc to Texinfo. writeTexinfo :: WriterOptions -> Pandoc -> String -writeTexinfo options document = - evalState (pandocToTexinfo options $ wrapTop document) $ +writeTexinfo options document = + evalState (pandocToTexinfo options $ wrapTop document) $ WriterState { stStrikeout = False, stSuperscript = False, stSubscript = False } -- | Add a "Top" node around the document, needed by Texinfo. @@ -116,10 +117,12 @@ blockToTexinfo (Plain lst) = inlineListToTexinfo lst blockToTexinfo (Para [Image txt (src,tit)]) = do - capt <- inlineListToTexinfo txt + capt <- if null txt + then return empty + else (\c -> text "@caption" <> braces c) `fmap` + inlineListToTexinfo txt img <- inlineToTexinfo (Image txt (src,tit)) - return $ text "@float" $$ img $$ (text "@caption{" <> capt <> char '}') $$ - text "@end float" + return $ text "@float" $$ img $$ capt $$ text "@end float" blockToTexinfo (Para lst) = inlineListToTexinfo lst -- this is handled differently from Plain in blockListToTexinfo @@ -217,7 +220,7 @@ blockToTexinfo (Table caption aligns widths heads rows) = do else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths let tableBody = text ("@multitable " ++ colDescriptors) $$ headers $$ - vcat rowsText $$ + vcat rowsText $$ text "@end multitable" return $ if isEmpty captionText then tableBody <> blankline @@ -241,7 +244,7 @@ tableAnyRowToTexinfo :: String -> [[Block]] -> State WriterState Doc tableAnyRowToTexinfo itemtype aligns cols = - zipWithM alignedBlock aligns cols >>= + zipWithM alignedBlock aligns cols >>= return . (text itemtype $$) . foldl (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty @@ -358,8 +361,8 @@ inlineToTexinfo :: Inline -- ^ Inline to convert inlineToTexinfo (Emph lst) = inlineListToTexinfo lst >>= return . inCmd "emph" -inlineToTexinfo (Strong lst) = - inlineListToTexinfo lst >>= return . inCmd "strong" +inlineToTexinfo (Strong lst) = + inlineListToTexinfo lst >>= return . inCmd "strong" inlineToTexinfo (Strikeout lst) = do modify $ \st -> st{ stStrikeout = True } diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 26d5ec6d7..5f3bb6bcd 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.Textile Copyright : Copyright (C) 2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -31,7 +31,8 @@ Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual> -} module Text.Pandoc.Writers.Textile ( writeTextile ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Options +import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.XML ( escapeStringForXML ) import Data.List ( intercalate ) @@ -46,9 +47,9 @@ data WriterState = WriterState { -- | Convert Pandoc to Textile. writeTextile :: WriterOptions -> Pandoc -> String -writeTextile opts document = - evalState (pandocToTextile opts document) - (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) +writeTextile opts document = + evalState (pandocToTextile opts document) + (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) -- | Return Textile representation of document. pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String @@ -90,14 +91,14 @@ escapeCharForTextile x = case x of escapeStringForTextile :: String -> String escapeStringForTextile = concatMap escapeCharForTextile --- | Convert Pandoc block element to Textile. +-- | Convert Pandoc block element to Textile. blockToTextile :: WriterOptions -- ^ Options -> Block -- ^ Block element - -> State WriterState String + -> State WriterState String blockToTextile _ Null = return "" -blockToTextile opts (Plain inlines) = +blockToTextile opts (Plain inlines) = inlineListToTextile opts inlines blockToTextile opts (Para [Image txt (src,tit)]) = do @@ -236,7 +237,7 @@ listItemToTextile opts items = do -- | Convert definition list item (label, list of blocks) to Textile. definitionListItemToTextile :: WriterOptions - -> ([Inline],[[Block]]) + -> ([Inline],[[Block]]) -> State WriterState String definitionListItemToTextile opts (label, items) = do labelText <- inlineListToTextile opts label @@ -294,8 +295,8 @@ tableRowToTextile opts alignStrings rownum cols' = do 0 -> "header" x | x `rem` 2 == 1 -> "odd" _ -> "even" - cols'' <- sequence $ zipWith - (\alignment item -> tableItemToTextile opts celltype alignment item) + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToTextile opts celltype alignment item) alignStrings cols' return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" @@ -320,7 +321,7 @@ tableItemToTextile opts celltype align' item = do -- | Convert list of Pandoc block elements to Textile. blockListToTextile :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements - -> State WriterState String + -> State WriterState String blockListToTextile opts blocks = mapM (blockToTextile opts) blocks >>= return . vcat @@ -332,11 +333,11 @@ inlineListToTextile opts lst = -- | Convert Pandoc inline element to Textile. inlineToTextile :: WriterOptions -> Inline -> State WriterState String -inlineToTextile opts (Emph lst) = do +inlineToTextile opts (Emph lst) = do contents <- inlineListToTextile opts lst return $ if '_' `elem` contents then "<em>" ++ contents ++ "</em>" - else "_" ++ contents ++ "_" + else "_" ++ contents ++ "_" inlineToTextile opts (Strong lst) = do contents <- inlineListToTextile opts lst @@ -377,7 +378,7 @@ inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst inlineToTextile _ (Code _ str) = return $ if '@' `elem` str then "<tt>" ++ escapeStringForXML str ++ "</tt>" - else "@" ++ str ++ "@" + else "@" ++ str ++ "@" inlineToTextile _ (Str str) = return $ escapeStringForTextile str diff --git a/src/pandoc.hs b/src/pandoc.hs index 2f85906d5..63a0df51a 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -33,7 +33,7 @@ module Main where import Text.Pandoc import Text.Pandoc.PDF (tex2pdf) import Text.Pandoc.Readers.LaTeX (handleIncludes) -import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, +import Text.Pandoc.Shared ( tabFilter, readDataFile, safeRead, headerShift, findDataFile, normalize, err, warn ) import Text.Pandoc.XML ( toEntities, fromEntities ) import Text.Pandoc.SelfContained ( makeSelfContained ) @@ -44,10 +44,11 @@ import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt import Data.Char ( toLower ) -import Data.List ( intercalate, isSuffixOf, isPrefixOf ) +import Data.List ( intercalate, isPrefixOf ) import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) import System.IO ( stdout ) import System.IO.Error ( isDoesNotExistError ) +import qualified Control.Exception as E import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.CSL as CSL @@ -56,7 +57,7 @@ import Control.Monad (when, unless, liftM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 (toString ) +import Data.ByteString.Lazy.UTF8 (toString) import Text.CSL.Reference (Reference(..)) #if MIN_VERSION_base(4,4,0) #else @@ -97,8 +98,8 @@ wrapWords indent c = wrap' (c - indent) (c - indent) then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs -nonTextFormats :: [String] -nonTextFormats = ["odt","docx","epub"] +isTextFormat :: String -> Bool +isTextFormat s = takeWhile (`notElem` "+-") s `notElem` ["odt","docx","epub"] -- | Data structure for command line options. data Opt = Opt @@ -131,7 +132,6 @@ data Opt = Opt , optEPUBFonts :: [FilePath] -- ^ EPUB fonts to embed , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments - , optStrict :: Bool -- ^ Use strict markdown syntax , optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , optWrapText :: Bool -- ^ Wrap text , optColumns :: Int -- ^ Line length in characters @@ -184,7 +184,6 @@ defaultOpts = Opt , optEPUBFonts = [] , optDumpArgs = False , optIgnoreArgs = False - , optStrict = False , optReferenceLinks = False , optWrapText = True , optColumns = 72 @@ -235,7 +234,10 @@ options = , Option "" ["strict"] (NoArg - (\opt -> return opt { optStrict = True } )) + (\opt -> do + err 59 $ "The --strict option has been removed.\n" ++ + "Use `markdown_strict' input or output format instead." + return opt )) "" -- "Disable markdown syntax extensions" , Option "R" ["parse-raw"] @@ -257,13 +259,13 @@ options = , Option "" ["base-header-level"] (ReqArg (\arg opt -> - case reads arg of - [(t,"")] | t > 0 -> do + case safeRead arg of + Just t | t > 0 -> do let oldTransforms = optTransforms opt let shift = t - 1 return opt{ optTransforms = headerShift shift : oldTransforms } - _ -> err 19 + _ -> err 19 "base-header-level must be a number > 0") "NUMBER") "" -- "Headers base level" @@ -289,9 +291,9 @@ options = , Option "" ["tab-stop"] (ReqArg (\arg opt -> - case reads arg of - [(t,"")] | t > 0 -> return opt { optTabStop = t } - _ -> err 31 + case safeRead arg of + Just t | t > 0 -> return opt { optTabStop = t } + _ -> err 31 "tab-stop must be a number greater than 0") "NUMBER") "" -- "Tab stop (default 4)" @@ -338,9 +340,9 @@ options = , Option "" ["columns"] (ReqArg (\arg opt -> - case reads arg of - [(t,"")] | t > 0 -> return opt { optColumns = t } - _ -> err 33 $ + case safeRead arg of + Just t | t > 0 -> return opt { optColumns = t } + _ -> err 33 $ "columns must be a number greater than 0") "NUMBER") "" -- "Length of line in characters" @@ -472,10 +474,10 @@ options = , Option "" ["slide-level"] (ReqArg (\arg opt -> do - case reads arg of - [(t,"")] | t >= 1 && t <= 6 -> + case safeRead arg of + Just t | t >= 1 && t <= 6 -> return opt { optSlideLevel = Just t } - _ -> err 39 $ + _ -> err 39 $ "slide level must be a number between 1 and 6") "NUMBER") "" -- "Force header level for slides" @@ -690,12 +692,20 @@ options = ] +readExtension :: String -> IO Extension +readExtension s = case safeRead ('E':'x':'t':'_':map toLower s) of + Just ext -> return ext + Nothing -> err 59 $ "Unknown extension: " ++ s + -- Returns usage message usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ - (wrapWords 16 78 $ map fst readers) ++ "\nOutput formats: " ++ - (wrapWords 16 78 $ map fst writers ++ nonTextFormats) ++ "\nOptions:") + (wrapWords 16 78 $ readers'names) ++ "\nOutput formats: " ++ + (wrapWords 16 78 $ writers'names) ++ "\nOptions:") + where + writers'names = map fst writers + readers'names = map fst readers -- Determine default reader based on source file extensions defaultReaderName :: String -> [FilePath] -> String @@ -752,6 +762,7 @@ defaultWriterName x = ".org" -> "org" ".asciidoc" -> "asciidoc" ".pdf" -> "latex" + ".fb2" -> "fb2" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" @@ -771,9 +782,10 @@ main = do ["Try " ++ prg ++ " --help for more information."] let defaultOpts' = if compatMode - then defaultOpts { optReader = "markdown" + then defaultOpts { optReader = "markdown_strict" , optWriter = "html" - , optStrict = True } + , optEmailObfuscation = + ReferenceObfuscation } else defaultOpts -- thread option data structure through all supplied option actions @@ -808,7 +820,6 @@ main = do , optEPUBFonts = epubFonts , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs - , optStrict = strict , optReferenceLinks = referenceLinks , optWrapText = wrap , optColumns = columns @@ -836,9 +847,10 @@ main = do let sources = if ignoreArgs then [] else args datadir <- case mbDataDir of - Nothing -> catch + Nothing -> E.catch (liftM Just $ getAppUserDataDirectory "pandoc") - (const $ return Nothing) + (\e -> let _ = (e :: E.SomeException) + in return Nothing) Just _ -> return mbDataDir -- assign reader and writer based on options and filenames @@ -855,8 +867,8 @@ main = do let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" - let laTeXOutput = writerName' == "latex" || writerName' == "beamer" || - writerName' == "latex+lhs" || writerName' == "beamer+lhs" + let laTeXOutput = "latex" `isPrefixOf` writerName' || + "beamer" `isPrefixOf` writerName' when pdfOutput $ do -- make sure writer is latex or beamer @@ -870,11 +882,11 @@ main = do latexEngine ++ " is needed for pdf output." Just _ -> return () - reader <- case (lookup readerName' readers) of - Just r -> return r - Nothing -> err 7 ("Unknown reader: " ++ readerName') + reader <- case getReader readerName' of + Right r -> return r + Left e -> err 7 e - let standalone' = standalone || writerName' `elem` nonTextFormats || pdfOutput + let standalone' = standalone || not (isTextFormat writerName') || pdfOutput templ <- case templatePath of _ | not standalone' -> return "" @@ -884,26 +896,20 @@ main = do Left e -> throwIO e Right t -> return t Just tp -> do - -- strip off "+lhs" if present - let format = takeWhile (/='+') writerName' + -- strip off extensions + let format = takeWhile (`notElem` "+-") writerName' let tp' = case takeExtension tp of "" -> tp <.> format _ -> tp - catch (UTF8.readFile tp') + E.catch (UTF8.readFile tp') (\e -> if isDoesNotExistError e - then catch + then E.catch (readDataFile datadir $ "templates" </> tp') - (\_ -> throwIO e) + (\e' -> let _ = (e' :: E.SomeException) + in throwIO e') else throwIO e) - let slideVariant = case writerName' of - "s5" -> S5Slides - "slidy" -> SlidySlides - "slideous" -> SlideousSlides - "dzslides" -> DZSlides - _ -> NoSlides - variables' <- case mathMethod of LaTeXMathML Nothing -> do s <- readDataFile datadir $ "data" </> "LaTeXMathML.js" @@ -913,20 +919,22 @@ main = do return $ ("mathml-script", s) : variables _ -> return variables - variables'' <- case slideVariant of - DZSlides -> do + variables'' <- if "dzslides" `isPrefixOf` writerName' + then do dztempl <- readDataFile datadir $ "dzslides" </> "template.html" let dzcore = unlines $ dropWhile (not . isPrefixOf "<!-- {{{{ dzslides core") $ lines dztempl return $ ("dzslides-core", dzcore) : variables' - _ -> return variables' + else return variables' -- unescape reference ids, which may contain XML entities, so -- that we can do lookups with regular string equality let unescapeRefId ref = ref{ refId = fromEntities (refId ref) } - refs <- mapM (\f -> catch (CSL.readBiblioFile f) $ \e -> - err 23 $ "Error reading bibliography `" ++ f ++ "'" ++ "\n" ++ show e) + refs <- mapM (\f -> E.catch (CSL.readBiblioFile f) + (\e -> let _ = (e :: E.SomeException) + in err 23 $ "Error reading bibliography `" ++ f ++ + "'" ++ "\n" ++ show e)) reffiles >>= return . map unescapeRefId . concat @@ -934,62 +942,54 @@ main = do then "." else takeDirectory (head sources) - let startParserState = - defaultParserState { stateParseRaw = parseRaw, - stateTabStop = tabStop, - stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' || - lhsExtension sources, - stateStandalone = standalone', - stateCitations = map CSL.refId refs, - stateSmart = smart || (texLigatures && - (laTeXOutput || writerName' == "context")), - stateOldDashes = oldDashes, - stateColumns = columns, - stateStrict = strict, - stateIndentedCodeClasses = codeBlockClasses, - stateApplyMacros = not laTeXOutput - } - - let writerOptions = defaultWriterOptions - { writerStandalone = standalone', - writerTemplate = templ, - writerVariables = variables'', - writerEPUBMetadata = epubMetadata, - writerTabStop = tabStop, - writerTableOfContents = toc && - writerName' /= "s5", - writerHTMLMathMethod = mathMethod, - writerSlideVariant = slideVariant, - writerIncremental = incremental, - writerCiteMethod = citeMethod, - writerBiblioFiles = reffiles, - writerIgnoreNotes = False, - writerNumberSections = numberSections, - writerSectionDivs = sectionDivs, - writerStrictMarkdown = strict, - writerReferenceLinks = referenceLinks, - writerWrapText = wrap, - writerColumns = columns, - writerLiterateHaskell = False, - writerEmailObfuscation = if strict - then ReferenceObfuscation - else obfuscationMethod, - writerIdentifierPrefix = idPrefix, - writerSourceDirectory = sourceDir, - writerUserDataDir = datadir, - writerHtml5 = html5 || - slideVariant == DZSlides, - writerChapters = chapters, - writerListings = listings, - writerBeamer = False, - writerSlideLevel = slideLevel, - writerHighlight = highlight, - writerHighlightStyle = highlightStyle, - writerSetextHeaders = setextHeaders, - writerTeXLigatures = texLigatures - } - - when (writerName' `elem` nonTextFormats&& outputFile == "-") $ + let readerOpts = def{ readerSmart = smart || (texLigatures && + (laTeXOutput || "context" `isPrefixOf` writerName')) + , readerStandalone = standalone' + , readerParseRaw = parseRaw + , readerColumns = columns + , readerTabStop = tabStop + , readerOldDashes = oldDashes + , readerCitations = map CSL.refId refs + , readerIndentedCodeClasses = codeBlockClasses + , readerApplyMacros = not laTeXOutput + } + + let writerOptions = def { writerStandalone = standalone', + writerTemplate = templ, + writerVariables = variables'', + writerEPUBMetadata = epubMetadata, + writerTabStop = tabStop, + writerTableOfContents = toc, + writerHTMLMathMethod = mathMethod, + writerIncremental = incremental, + writerCiteMethod = citeMethod, + writerBiblioFiles = reffiles, + writerIgnoreNotes = False, + writerNumberSections = numberSections, + writerSectionDivs = sectionDivs, + writerReferenceLinks = referenceLinks, + writerWrapText = wrap, + writerColumns = columns, + writerEmailObfuscation = obfuscationMethod, + writerIdentifierPrefix = idPrefix, + writerSourceDirectory = sourceDir, + writerUserDataDir = datadir, + writerHtml5 = html5, + writerChapters = chapters, + writerListings = listings, + writerBeamer = False, + writerSlideLevel = slideLevel, + writerHighlight = highlight, + writerHighlightStyle = highlightStyle, + writerSetextHeaders = setextHeaders, + writerTeXLigatures = texLigatures, + writerEpubStylesheet = epubStylesheet, + writerEpubFonts = epubFonts, + writerReferenceODT = referenceODT, + writerReferenceDocx = referenceDocx + } + + when (not (isTextFormat writerName') && outputFile == "-") $ err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++ "Specify an output file using the -o option." @@ -1009,12 +1009,12 @@ main = do then handleIncludes else return - doc <- (reader startParserState) `fmap` (readSources sources >>= + doc <- (reader readerOpts) `fmap` (readSources sources >>= handleIncludes' . convertTabs . intercalate "\n") let doc0 = foldr ($) doc transforms - doc1 <- if writerName' == "rtf" + doc1 <- if "rtf" `isPrefixOf` writerName' then bottomUpM rtfEmbedImage doc0 else return doc0 @@ -1042,31 +1042,25 @@ main = do writerFn "-" = UTF8.putStr writerFn f = UTF8.writeFile f - case lookup writerName' writers of - Nothing - | writerName' == "epub" -> - writeEPUB epubStylesheet epubFonts writerOptions doc2 - >>= writeBinary - | writerName' == "odt" -> - writeODT referenceODT writerOptions doc2 >>= writeBinary - | writerName' == "docx" -> - writeDocx referenceDocx writerOptions doc2 >>= writeBinary - | otherwise -> err 9 ("Unknown writer: " ++ writerName') - Just w - | pdfOutput -> do - res <- tex2pdf latexEngine $ w writerOptions doc2 + case getWriter writerName' of + Left e -> err 9 e + Right (IOStringWriter f) -> f writerOptions doc2 >>= writerFn outputFile + Right (IOByteStringWriter f) -> f writerOptions doc2 >>= writeBinary + Right (PureStringWriter f) + | pdfOutput -> do + res <- tex2pdf latexEngine $ f writerOptions doc2 case res of Right pdf -> writeBinary pdf Left err' -> err 43 $ toString err' - Just w - | htmlFormat && ascii -> - writerFn outputFile =<< selfcontain (toEntities result) - | otherwise -> - writerFn outputFile =<< selfcontain result - where result = w writerOptions doc2 ++ ['\n' | not standalone'] - htmlFormat = writerName' `elem` + | otherwise -> selfcontain (f writerOptions doc2 ++ + ['\n' | not standalone']) + >>= writerFn outputFile . handleEntities + where htmlFormat = writerName' `elem` ["html","html+lhs","html5","html5+lhs", "s5","slidy","slideous","dzslides"] selfcontain = if selfContained && htmlFormat then makeSelfContained datadir else return + handleEntities = if htmlFormat && ascii + then toEntities + else id diff --git a/src/test-pandoc.hs b/src/test-pandoc.hs deleted file mode 100644 index 1a8c05e14..000000000 --- a/src/test-pandoc.hs +++ /dev/null @@ -1,36 +0,0 @@ -{-# OPTIONS_GHC -Wall #-} - -module Main where - -import Test.Framework - -import qualified Tests.Old -import qualified Tests.Readers.LaTeX -import qualified Tests.Readers.Markdown -import qualified Tests.Readers.RST -import qualified Tests.Writers.ConTeXt -import qualified Tests.Writers.LaTeX -import qualified Tests.Writers.HTML -import qualified Tests.Writers.Native -import qualified Tests.Writers.Markdown -import qualified Tests.Shared - -tests :: [Test] -tests = [ testGroup "Old" Tests.Old.tests - , testGroup "Shared" Tests.Shared.tests - , testGroup "Writers" - [ testGroup "Native" Tests.Writers.Native.tests - , testGroup "ConTeXt" Tests.Writers.ConTeXt.tests - , testGroup "LaTeX" Tests.Writers.LaTeX.tests - , testGroup "HTML" Tests.Writers.HTML.tests - , testGroup "Markdown" Tests.Writers.Markdown.tests - ] - , testGroup "Readers" - [ testGroup "LaTeX" Tests.Readers.LaTeX.tests - , testGroup "Markdown" Tests.Readers.Markdown.tests - , testGroup "RST" Tests.Readers.RST.tests - ] - ] - -main :: IO () -main = defaultMain tests |