diff options
Diffstat (limited to 'src')
28 files changed, 1782 insertions, 1177 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index b3a7fddbc..6cb8130a4 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -71,6 +71,7 @@ module Text.Pandoc , NoteTable , HeaderType (..) -- * Writers: converting /from/ Pandoc format + , writeNative , writeMarkdown , writePlain , writeRST @@ -79,17 +80,17 @@ module Text.Pandoc , writeTexinfo , writeHtml , writeHtmlString - , writeS5 - , writeS5String , writeDocbook , writeOpenDocument , writeMan , writeMediaWiki , writeTextile , writeRTF - , prettyPandoc + , writeODT + , writeEPUB -- * Writer options used in writers , WriterOptions (..) + , HTMLSlideVariant (..) , HTMLMathMethod (..) , defaultWriterOptions -- * Rendering templates and default templates @@ -103,13 +104,15 @@ import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.HTML +import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST import Text.Pandoc.Writers.LaTeX import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.Texinfo import Text.Pandoc.Writers.HTML -import Text.Pandoc.Writers.S5 +import Text.Pandoc.Writers.ODT +import Text.Pandoc.Writers.EPUB import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OpenDocument import Text.Pandoc.Writers.Man @@ -117,6 +120,7 @@ import Text.Pandoc.Writers.RTF import Text.Pandoc.Writers.MediaWiki import Text.Pandoc.Writers.Textile import Text.Pandoc.Templates +import Text.Pandoc.Parsing import Text.Pandoc.Shared import Data.Version (showVersion) import Paths_pandoc (version) diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index 7ddd26625..fffca3b2e 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -52,6 +52,7 @@ type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) -- | Style of list numbers. data ListNumberStyle = DefaultStyle + | Example | Decimal | LowerRoman | UpperRoman diff --git a/src/Text/Pandoc/ODT.hs b/src/Text/Pandoc/ODT.hs deleted file mode 100644 index a69d9d4e4..000000000 --- a/src/Text/Pandoc/ODT.hs +++ /dev/null @@ -1,101 +0,0 @@ -{- -Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.ODT - Copyright : Copyright (C) 2008-2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for producing an ODT file from OpenDocument XML. --} -module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where -import Data.List ( find ) -import System.FilePath ( (</>), takeFileName ) -import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 ( fromString ) -import Codec.Archive.Zip -import Control.Applicative ( (<$>) ) -import Text.ParserCombinators.Parsec -import System.Time -import Paths_pandoc ( getDataFileName ) -import System.Directory -import Control.Monad (liftM) - --- | Produce an ODT file from OpenDocument XML. -saveOpenDocumentAsODT :: Maybe FilePath -- ^ Path of user data directory - -> FilePath -- ^ Pathname of ODT file to be produced - -> FilePath -- ^ Relative directory of source file - -> Maybe FilePath -- ^ Path specified by --reference-odt - -> String -- ^ OpenDocument XML contents - -> IO () -saveOpenDocumentAsODT datadir destinationODTPath sourceDirRelative mbRefOdt xml = do - refArchive <- liftM toArchive $ - case mbRefOdt of - Just f -> B.readFile f - Nothing -> do - let defaultODT = getDataFileName "reference.odt" >>= B.readFile - case datadir of - Nothing -> defaultODT - Just d -> do - exists <- doesFileExist (d </> "reference.odt") - if exists - then B.readFile (d </> "reference.odt") - else defaultODT - -- handle pictures - let (newContents, pics) = - case runParser pPictures [] "OpenDocument XML contents" xml of - Left err -> error $ show err - Right x -> x - picEntries <- mapM (makePictureEntry sourceDirRelative) pics - (TOD epochTime _) <- getClockTime - let contentEntry = toEntry "content.xml" epochTime $ fromString newContents - let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries) - B.writeFile destinationODTPath $ fromArchive archive - -makePictureEntry :: FilePath -- ^ Relative directory of source file - -> (FilePath, String) -- ^ Path and new path of picture - -> IO Entry -makePictureEntry sourceDirRelative (path, newPath) = do - entry <- readEntry [] $ sourceDirRelative </> path - return (entry { eRelativePath = newPath }) - -pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)]) -pPictures = do - contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<") - pics <- getState - return (contents, pics) - -pPicture :: GenParser Char [(FilePath, String)] [Char] -pPicture = try $ do - string "<draw:image xlink:href=\"" - path <- manyTill anyChar (char '"') - let filename = takeFileName path - pics <- getState - newPath <- case find (\(o, _) -> o == path) pics of - Just (_, new) -> return new - Nothing -> do - -- get a unique name - let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics - let new = "Pictures/" ++ replicate dups '0' ++ filename - updateState ((path, new) :) - return new - return $ "<draw:image xlink:href=\"" ++ newPath ++ "\"" diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs new file mode 100644 index 000000000..dce99fd75 --- /dev/null +++ b/src/Text/Pandoc/Parsing.hs @@ -0,0 +1,705 @@ +{- +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Parsing + Copyright : Copyright (C) 2006-2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +A utility library with parsers used in pandoc readers. +-} +module Text.Pandoc.Parsing ( (>>~), + anyLine, + many1Till, + notFollowedBy', + oneOfStrings, + spaceChar, + skipSpaces, + blankline, + blanklines, + enclosed, + stringAnyCase, + parseFromString, + lineClump, + charsInBalanced, + charsInBalanced', + romanNumeral, + emailAddress, + uri, + withHorizDisplacement, + nullBlock, + failIfStrict, + failUnlessLHS, + escaped, + anyOrderedListMarker, + orderedListMarker, + charRef, + tableWith, + gridTableWith, + readWith, + testStringWith, + ParserState (..), + defaultParserState, + HeaderType (..), + ParserContext (..), + QuoteContext (..), + NoteTable, + KeyTable, + Key (..), + lookupKeySrc, + refsMatch ) +where + +import Text.Pandoc.Definition +import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) +import Text.ParserCombinators.Parsec +import Text.Pandoc.CharacterReferences ( characterReference ) +import Data.Char ( toLower, toUpper, ord, isAscii ) +import Data.List ( intercalate, transpose ) +import Network.URI ( parseURI, URI (..), isAllowedInURI ) +import Control.Monad ( join, liftM ) +import Text.Pandoc.Shared +import qualified Data.Map as M +import Text.TeXMath.Macros (Macro) + +-- | Like >>, but returns the operation on the left. +-- (Suggested by Tillmann Rendel on Haskell-cafe list.) +(>>~) :: (Monad m) => m a -> m b -> m a +a >>~ b = a >>= \x -> b >> return x + +-- | Parse any line of text +anyLine :: GenParser Char st [Char] +anyLine = manyTill anyChar newline + +-- | Like @manyTill@, but reads at least one item. +many1Till :: GenParser tok st a + -> GenParser tok st end + -> GenParser tok st [a] +many1Till p end = do + first <- p + rest <- manyTill p end + return (first:rest) + +-- | A more general form of @notFollowedBy@. This one allows any +-- type of parser to be specified, and succeeds only if that parser fails. +-- It does not consume any input. +notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () +notFollowedBy' p = try $ join $ do a <- try p + return (unexpected (show a)) + <|> + return (return ()) +-- (This version due to Andrew Pimlott on the Haskell mailing list.) + +-- | Parses one of a list of strings (tried in order). +oneOfStrings :: [String] -> GenParser Char st String +oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings + +-- | Parses a space or tab. +spaceChar :: CharParser st Char +spaceChar = char ' ' <|> char '\t' + +-- | Skips zero or more spaces or tabs. +skipSpaces :: GenParser Char st () +skipSpaces = skipMany spaceChar + +-- | Skips zero or more spaces or tabs, then reads a newline. +blankline :: GenParser Char st Char +blankline = try $ skipSpaces >> newline + +-- | Parses one or more blank lines and returns a string of newlines. +blanklines :: GenParser Char st [Char] +blanklines = many1 blankline + +-- | Parses material enclosed between start and end parsers. +enclosed :: GenParser Char st t -- ^ start parser + -> GenParser Char st end -- ^ end parser + -> GenParser Char st a -- ^ content parser (to be used repeatedly) + -> GenParser Char st [a] +enclosed start end parser = try $ + start >> notFollowedBy space >> many1Till parser end + +-- | Parse string, case insensitive. +stringAnyCase :: [Char] -> CharParser st String +stringAnyCase [] = string "" +stringAnyCase (x:xs) = do + firstChar <- char (toUpper x) <|> char (toLower x) + rest <- stringAnyCase xs + return (firstChar:rest) + +-- | Parse contents of 'str' using 'parser' and return result. +parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromString parser str = do + oldPos <- getPosition + oldInput <- getInput + setInput str + result <- parser + setInput oldInput + setPosition oldPos + return result + +-- | Parse raw line block up to and including blank lines. +lineClump :: GenParser Char st String +lineClump = blanklines + <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) + +-- | Parse a string of characters between an open character +-- and a close character, including text between balanced +-- pairs of open and close, which must be different. For example, +-- @charsInBalanced '(' ')'@ will parse "(hello (there))" +-- and return "hello (there)". Stop if a blank line is +-- encountered. +charsInBalanced :: Char -> Char -> GenParser Char st String +charsInBalanced open close = try $ do + char open + raw <- many $ (many1 (noneOf [open, close, '\n'])) + <|> (do res <- charsInBalanced open close + return $ [open] ++ res ++ [close]) + <|> try (string "\n" >>~ notFollowedBy' blanklines) + char close + return $ concat raw + +-- | Like @charsInBalanced@, but allow blank lines in the content. +charsInBalanced' :: Char -> Char -> GenParser Char st String +charsInBalanced' open close = try $ do + char open + raw <- many $ (many1 (noneOf [open, close])) + <|> (do res <- charsInBalanced' open close + return $ [open] ++ res ++ [close]) + char close + return $ concat raw + +-- Auxiliary functions for romanNumeral: + +lowercaseRomanDigits :: [Char] +lowercaseRomanDigits = ['i','v','x','l','c','d','m'] + +uppercaseRomanDigits :: [Char] +uppercaseRomanDigits = map toUpper lowercaseRomanDigits + +-- | Parses a roman numeral (uppercase or lowercase), returns number. +romanNumeral :: Bool -- ^ Uppercase if true + -> GenParser Char st Int +romanNumeral upperCase = do + let romanDigits = if upperCase + then uppercaseRomanDigits + else lowercaseRomanDigits + lookAhead $ oneOf romanDigits + let [one, five, ten, fifty, hundred, fivehundred, thousand] = + map char romanDigits + thousands <- many thousand >>= (return . (1000 *) . length) + ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 + fivehundreds <- many fivehundred >>= (return . (500 *) . length) + fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 + hundreds <- many hundred >>= (return . (100 *) . length) + nineties <- option 0 $ try $ ten >> hundred >> return 90 + fifties <- many fifty >>= (return . (50 *) . length) + forties <- option 0 $ try $ ten >> fifty >> return 40 + tens <- many ten >>= (return . (10 *) . length) + nines <- option 0 $ try $ one >> ten >> return 9 + fives <- many five >>= (return . (5 *) . length) + fours <- option 0 $ try $ one >> five >> return 4 + ones <- many one >>= (return . length) + let total = thousands + ninehundreds + fivehundreds + fourhundreds + + hundreds + nineties + fifties + forties + tens + nines + + fives + fours + ones + if total == 0 + then fail "not a roman numeral" + else return total + +-- Parsers for email addresses and URIs + +emailChar :: GenParser Char st Char +emailChar = alphaNum <|> oneOf "-+_." + +domainChar :: GenParser Char st Char +domainChar = alphaNum <|> char '-' + +domain :: GenParser Char st [Char] +domain = do + first <- many1 domainChar + dom <- many1 $ try (char '.' >> many1 domainChar ) + return $ intercalate "." (first:dom) + +-- | Parses an email address; returns original and corresponding +-- escaped mailto: URI. +emailAddress :: GenParser Char st (String, String) +emailAddress = try $ do + firstLetter <- alphaNum + restAddr <- many emailChar + let addr = firstLetter:restAddr + char '@' + dom <- domain + let full = addr ++ '@':dom + return (full, escapeURI $ "mailto:" ++ full) + +-- | Parses a URI. Returns pair of original and URI-escaped version. +uri :: GenParser Char st (String, String) +uri = try $ do + let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ] + lookAhead $ oneOfStrings protocols + -- scan non-ascii characters and ascii characters allowed in a URI + str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c) + -- now see if they amount to an absolute URI + case parseURI (escapeURI str) of + Just uri' -> if uriScheme uri' `elem` protocols + then return (str, show uri') + else fail "not a URI" + Nothing -> fail "not a URI" + +-- | Applies a parser, returns tuple of its results and its horizontal +-- displacement (the difference between the source column at the end +-- and the source column at the beginning). Vertical displacement +-- (source row) is ignored. +withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply + -> GenParser Char st (a, Int) -- ^ (result, displacement) +withHorizDisplacement parser = do + pos1 <- getPosition + result <- parser + pos2 <- getPosition + return (result, sourceColumn pos2 - sourceColumn pos1) + +-- | Parses a character and returns 'Null' (so that the parser can move on +-- if it gets stuck). +nullBlock :: GenParser Char st Block +nullBlock = anyChar >> return Null + +-- | Fail if reader is in strict markdown syntax mode. +failIfStrict :: GenParser Char ParserState () +failIfStrict = do + state <- getState + if stateStrict state then fail "strict mode" else return () + +-- | Fail unless we're in literate haskell mode. +failUnlessLHS :: GenParser tok ParserState () +failUnlessLHS = do + state <- getState + if stateLiterateHaskell state then return () else fail "Literate haskell feature" + +-- | Parses backslash, then applies character parser. +escaped :: GenParser Char st Char -- ^ Parser for character to escape + -> GenParser Char st Inline +escaped parser = try $ do + char '\\' + result <- parser + return (Str [result]) + +-- | Parses an uppercase roman numeral and returns (UpperRoman, number). +upperRoman :: GenParser Char st (ListNumberStyle, Int) +upperRoman = do + num <- romanNumeral True + return (UpperRoman, num) + +-- | Parses a lowercase roman numeral and returns (LowerRoman, number). +lowerRoman :: GenParser Char st (ListNumberStyle, Int) +lowerRoman = do + num <- romanNumeral False + return (LowerRoman, num) + +-- | Parses a decimal numeral and returns (Decimal, number). +decimal :: GenParser Char st (ListNumberStyle, Int) +decimal = do + num <- many1 digit + return (Decimal, read num) + +-- | Parses a '@' and optional label and +-- returns (DefaultStyle, [next example number]). The next +-- example number is incremented in parser state, and the label +-- (if present) is added to the label table. +exampleNum :: GenParser Char ParserState (ListNumberStyle, Int) +exampleNum = do + char '@' + lab <- many (alphaNum <|> oneOf "_-") + st <- getState + let num = stateNextExample st + let newlabels = if null lab + then stateExamples st + else M.insert lab num $ stateExamples st + updateState $ \s -> s{ stateNextExample = num + 1 + , stateExamples = newlabels } + return (Example, num) + +-- | Parses a '#' returns (DefaultStyle, 1). +defaultNum :: GenParser Char st (ListNumberStyle, Int) +defaultNum = do + char '#' + return (DefaultStyle, 1) + +-- | Parses a lowercase letter and returns (LowerAlpha, number). +lowerAlpha :: GenParser Char st (ListNumberStyle, Int) +lowerAlpha = do + ch <- oneOf ['a'..'z'] + return (LowerAlpha, ord ch - ord 'a' + 1) + +-- | Parses an uppercase letter and returns (UpperAlpha, number). +upperAlpha :: GenParser Char st (ListNumberStyle, Int) +upperAlpha = do + ch <- oneOf ['A'..'Z'] + return (UpperAlpha, ord ch - ord 'A' + 1) + +-- | Parses a roman numeral i or I +romanOne :: GenParser Char st (ListNumberStyle, Int) +romanOne = (char 'i' >> return (LowerRoman, 1)) <|> + (char 'I' >> return (UpperRoman, 1)) + +-- | Parses an ordered list marker and returns list attributes. +anyOrderedListMarker :: GenParser Char ParserState ListAttributes +anyOrderedListMarker = choice $ + [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], + numParser <- [decimal, exampleNum, defaultNum, romanOne, + lowerAlpha, lowerRoman, upperAlpha, upperRoman]] + +-- | Parses a list number (num) followed by a period, returns list attributes. +inPeriod :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inPeriod num = try $ do + (style, start) <- num + char '.' + let delim = if style == DefaultStyle + then DefaultDelim + else Period + return (start, style, delim) + +-- | Parses a list number (num) followed by a paren, returns list attributes. +inOneParen :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inOneParen num = try $ do + (style, start) <- num + char ')' + return (start, style, OneParen) + +-- | Parses a list number (num) enclosed in parens, returns list attributes. +inTwoParens :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inTwoParens num = try $ do + char '(' + (style, start) <- num + char ')' + return (start, style, TwoParens) + +-- | Parses an ordered list marker with a given style and delimiter, +-- returns number. +orderedListMarker :: ListNumberStyle + -> ListNumberDelim + -> GenParser Char ParserState Int +orderedListMarker style delim = do + let num = defaultNum <|> -- # can continue any kind of list + case style of + DefaultStyle -> decimal + Example -> exampleNum + Decimal -> decimal + UpperRoman -> upperRoman + LowerRoman -> lowerRoman + UpperAlpha -> upperAlpha + LowerAlpha -> lowerAlpha + let context = case delim of + DefaultDelim -> inPeriod + Period -> inPeriod + OneParen -> inOneParen + TwoParens -> inTwoParens + (start, _, _) <- context num + return start + +-- | Parses a character reference and returns a Str element. +charRef :: GenParser Char st Inline +charRef = do + c <- characterReference + return $ Str [c] + +-- | Parse a table using 'headerParser', 'rowParser', +-- 'lineParser', and 'footerParser'. +tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int]) + -> ([Int] -> GenParser Char ParserState [[Block]]) + -> GenParser Char ParserState sep + -> GenParser Char ParserState end + -> GenParser Char ParserState [Inline] + -> GenParser Char ParserState Block +tableWith headerParser rowParser lineParser footerParser captionParser = try $ do + caption' <- option [] captionParser + (heads, aligns, indices) <- headerParser + lines' <- rowParser indices `sepEndBy` lineParser + footerParser + caption <- if null caption' + then option [] captionParser + else return caption' + state <- getState + let numColumns = stateColumns state + let widths = widthsFromIndices numColumns indices + return $ Table caption aligns widths heads lines' + +-- Calculate relative widths of table columns, based on indices +widthsFromIndices :: Int -- Number of columns on terminal + -> [Int] -- Indices + -> [Double] -- Fractional relative sizes of columns +widthsFromIndices _ [] = [] +widthsFromIndices numColumns indices = + let lengths' = zipWith (-) indices (0:indices) + lengths = reverse $ + case reverse lengths' of + [] -> [] + [x] -> [x] + -- compensate for the fact that intercolumn + -- spaces are counted in widths of all columns + -- but the last... + (x:y:zs) -> if x < y && y - x <= 2 + then y:y:zs + else x:y:zs + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs + +-- Parse a grid table: starts with row of '-' on top, then header +-- (which may be grid), then the rows, +-- which may be grid, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). +gridTableWith :: GenParser Char ParserState Block -- ^ Block parser + -> GenParser Char ParserState [Inline] -- ^ Caption parser + -> Bool -- ^ Headerless table + -> GenParser Char ParserState Block +gridTableWith block tableCaption headless = + tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption + +gridTableSplitLine :: [Int] -> String -> [String] +gridTableSplitLine indices line = + map removeFinalBar $ tail $ splitByIndices (init indices) line + +gridPart :: Char -> GenParser Char st (Int, Int) +gridPart ch = do + dashes <- many1 (char ch) + char '+' + return (length dashes, length dashes + 1) + +gridDashedLines :: Char -> GenParser Char st [(Int,Int)] +gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline + +removeFinalBar :: String -> String +removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") . + reverse + +-- | Separator between rows of grid table. +gridTableSep :: Char -> GenParser Char ParserState Char +gridTableSep ch = try $ gridDashedLines ch >> return '\n' + +-- | Parse header for a grid table. +gridTableHeader :: Bool -- ^ Headerless table + -> GenParser Char ParserState Block + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) +gridTableHeader headless block = try $ do + optional blanklines + dashes <- gridDashedLines '-' + rawContent <- if headless + then return $ repeat "" + else many1 + (notFollowedBy (gridTableSep '=') >> char '|' >> + many1Till anyChar newline) + if headless + then return () + else gridTableSep '=' >> return () + let lines' = map snd dashes + let indices = scanl (+) 0 lines' + let aligns = replicate (length lines') AlignDefault + -- RST does not have a notion of alignments + let rawHeads = if headless + then replicate (length dashes) "" + else map (intercalate " ") $ transpose + $ map (gridTableSplitLine indices) rawContent + heads <- mapM (parseFromString $ many block) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) + +gridTableRawLine :: [Int] -> GenParser Char ParserState [String] +gridTableRawLine indices = do + char '|' + line <- many1Till anyChar newline + return (gridTableSplitLine indices $ removeTrailingSpace line) + +-- | Parse row of grid table. +gridTableRow :: GenParser Char ParserState Block + -> [Int] + -> GenParser Char ParserState [[Block]] +gridTableRow block indices = do + colLines <- many1 (gridTableRawLine indices) + let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ + transpose colLines + mapM (liftM compactifyCell . parseFromString (many block)) cols + +removeOneLeadingSpace :: [String] -> [String] +removeOneLeadingSpace xs = + if all startsWithSpace xs + then map (drop 1) xs + else xs + where startsWithSpace "" = True + startsWithSpace (y:_) = y == ' ' + +compactifyCell :: [Block] -> [Block] +compactifyCell bs = head $ compactify [bs] + +-- | Parse footer for a grid table. +gridTableFooter :: GenParser Char ParserState [Char] +gridTableFooter = blanklines + +--- + +-- | Parse a string with a given parser and state. +readWith :: GenParser Char ParserState a -- ^ parser + -> ParserState -- ^ initial state + -> String -- ^ input string + -> a +readWith parser state input = + case runParser parser state "source" input of + Left err -> error $ "\nError:\n" ++ show err + Right result -> result + +-- | Parse a string with @parser@ (for testing). +testStringWith :: (Show a) => GenParser Char ParserState a + -> String + -> IO () +testStringWith parser str = UTF8.putStrLn $ show $ + readWith parser defaultParserState str + +-- | Parsing options. +data ParserState = ParserState + { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? + stateParserContext :: ParserContext, -- ^ Inside list? + stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? + stateKeys :: KeyTable, -- ^ List of reference keys +#ifdef _CITEPROC + stateCitations :: [String], -- ^ List of available citations +#endif + stateNotes :: NoteTable, -- ^ List of notes + stateTabStop :: Int, -- ^ Tab stop + stateStandalone :: Bool, -- ^ Parse bibliographic info? + stateTitle :: [Inline], -- ^ Title of document + stateAuthors :: [[Inline]], -- ^ Authors of document + stateDate :: [Inline], -- ^ Date of document + stateStrict :: Bool, -- ^ Use strict markdown syntax? + stateSmart :: Bool, -- ^ Use smart typography? + stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell + stateColumns :: Int, -- ^ Number of columns in terminal + stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used + stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks + stateNextExample :: Int, -- ^ Number of next example + stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers + stateHasChapters :: Bool, -- ^ True if \chapter encountered + stateApplyMacros :: Bool, -- ^ Apply LaTeX macros? + stateMacros :: [Macro] -- ^ List of macros defined so far + } + deriving Show + +defaultParserState :: ParserState +defaultParserState = + ParserState { stateParseRaw = False, + stateParserContext = NullState, + stateQuoteContext = NoQuote, + stateSanitizeHTML = False, + stateKeys = M.empty, +#ifdef _CITEPROC + stateCitations = [], +#endif + stateNotes = [], + stateTabStop = 4, + stateStandalone = False, + stateTitle = [], + stateAuthors = [], + stateDate = [], + stateStrict = False, + stateSmart = False, + stateLiterateHaskell = False, + stateColumns = 80, + stateHeaderTable = [], + stateIndentedCodeClasses = [], + stateNextExample = 1, + stateExamples = M.empty, + stateHasChapters = False, + stateApplyMacros = True, + stateMacros = []} + +data HeaderType + = SingleHeader Char -- ^ Single line of characters underneath + | DoubleHeader Char -- ^ Lines of characters above and below + deriving (Eq, Show) + +data ParserContext + = ListItemState -- ^ Used when running parser on list item contents + | NullState -- ^ Default state + deriving (Eq, Show) + +data QuoteContext + = InSingleQuote -- ^ Used when parsing inside single quotes + | InDoubleQuote -- ^ Used when parsing inside double quotes + | NoQuote -- ^ Used when not parsing inside quotes + deriving (Eq, Show) + +type NoteTable = [(String, String)] + +newtype Key = Key [Inline] deriving (Show, Read) + +instance Eq Key where + Key a == Key b = refsMatch a b + +instance Ord Key where + compare (Key a) (Key b) = if a == b then EQ else compare a b + +type KeyTable = M.Map Key Target + +-- | Look up key in key table and return target object. +lookupKeySrc :: KeyTable -- ^ Key table + -> Key -- ^ Key + -> Maybe Target +lookupKeySrc table key = case M.lookup key table of + Nothing -> Nothing + Just src -> Just src + +-- | Returns @True@ if keys match (case insensitive). +refsMatch :: [Inline] -> [Inline] -> Bool +refsMatch ((Str x):restx) ((Str y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Emph x):restx) ((Emph y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strong x):restx) ((Strong y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Superscript x):restx) ((Superscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Subscript x):restx) ((Subscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = + t == u && refsMatch x y && refsMatch restx resty +refsMatch ((Code x):restx) ((Code y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Math t x):restx) ((Math u y):resty) = + ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty +refsMatch ((TeX x):restx) ((TeX y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty +refsMatch [] x = null x +refsMatch x [] = null x + diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5c188e3d9..5ccbc4fb1 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -44,13 +44,14 @@ module Text.Pandoc.Readers.HTML ( import Text.ParserCombinators.Parsec import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Data.Maybe ( fromMaybe ) import Data.List ( isPrefixOf, isSuffixOf, intercalate ) import Data.Char ( toLower, isAlphaNum ) import Network.URI ( parseURIReference, URI (..) ) -import Control.Monad ( liftM ) +import Control.Monad ( liftM, when ) -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ParserState -- ^ Parser state @@ -198,11 +199,11 @@ inlinesTilEnd tag = manyTill inline (htmlEndTag tag) -- | Parse blocks between open and close tag. blocksIn :: String -> GenParser Char ParserState [Block] -blocksIn tag = try $ htmlTag tag >> spaces >> blocksTilEnd tag +blocksIn tag = try $ htmlOpenTag tag >> spaces >> blocksTilEnd tag -- | Parse inlines between open and close tag. inlinesIn :: String -> GenParser Char ParserState [Inline] -inlinesIn tag = try $ htmlTag tag >> spaces >> inlinesTilEnd tag +inlinesIn tag = try $ htmlOpenTag tag >> spaces >> inlinesTilEnd tag -- | Extract type from a tag: e.g. @br@ from @\<br\>@ extractTagType :: String -> String @@ -258,18 +259,33 @@ anyHtmlEndTag = try $ do then return $ "<!-- unsafe HTML removed -->" else return result -htmlTag :: String -> GenParser Char ParserState (String, [(String, String)]) -htmlTag tag = try $ do +htmlTag :: Bool + -> String + -> GenParser Char ParserState (String, [(String, String)]) +htmlTag selfClosing tag = try $ do char '<' spaces stringAnyCase tag attribs <- many htmlAttribute spaces - optional (string "/") - spaces + -- note: we want to handle both HTML and XHTML, + -- so we don't require the / + when selfClosing $ optional $ char '/' >> spaces char '>' return (tag, (map (\(name, content, _) -> (name, content)) attribs)) +htmlOpenTag :: String + -> GenParser Char ParserState (String, [(String, String)]) +htmlOpenTag = htmlTag False + +htmlCloseTag :: String + -> GenParser Char ParserState (String, [(String, String)]) +htmlCloseTag = htmlTag False . ('/':) + +htmlSelfClosingTag :: String + -> GenParser Char ParserState (String, [(String, String)]) +htmlSelfClosingTag = htmlTag True + -- parses a quoted html attribute value quoted :: Char -> GenParser Char st (String, String) quoted quoteChar = do @@ -344,7 +360,7 @@ anyHtmlInlineTag = try $ do -- Scripts must be treated differently, because they can contain '<>' etc. htmlScript :: GenParser Char ParserState [Char] htmlScript = try $ do - lookAhead $ htmlTag "script" + lookAhead $ htmlOpenTag "script" open <- anyHtmlTag rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script") st <- getState @@ -379,7 +395,7 @@ scriptChunk = jsComment <|> jsString <|> jsChars -- Style tags must be treated differently, because they can contain CSS htmlStyle :: GenParser Char ParserState [Char] htmlStyle = try $ do - lookAhead $ htmlTag "style" + lookAhead $ htmlOpenTag "style" open <- anyHtmlTag rest <- manyTill anyChar (htmlEndTag "style") st <- getState @@ -411,7 +427,8 @@ rawVerbatimBlock = try $ do -- We don't want to parse </body> or </html> as raw HTML, since these -- are handled in parseHtml. rawHtmlBlock' :: GenParser Char ParserState Block -rawHtmlBlock' = do notFollowedBy' (htmlTag "/body" <|> htmlTag "/html") +rawHtmlBlock' = do notFollowedBy' (htmlCloseTag "body" <|> + htmlCloseTag "html") rawHtmlBlock -- | Parses an HTML comment. @@ -441,13 +458,13 @@ definition = try $ do nonTitleNonHead :: GenParser Char ParserState Char nonTitleNonHead = try $ do - notFollowedBy $ (htmlTag "title" >> return ' ') <|> + notFollowedBy $ (htmlOpenTag "title" >> return ' ') <|> (htmlEndTag "head" >> return ' ') (rawHtmlBlock >> return ' ') <|> anyChar parseTitle :: GenParser Char ParserState [Inline] parseTitle = try $ do - (tag, _) <- htmlTag "title" + (tag, _) <- htmlOpenTag "title" contents <- inlinesTilEnd tag spaces return contents @@ -455,7 +472,7 @@ parseTitle = try $ do -- parse header and return meta-information (for now, just title) parseHead :: GenParser Char ParserState Meta parseHead = try $ do - htmlTag "head" + htmlOpenTag "head" spaces skipMany nonTitleNonHead contents <- option [] parseTitle @@ -463,13 +480,10 @@ parseHead = try $ do htmlEndTag "head" return $ Meta contents [] [] -skipHtmlTag :: String -> GenParser Char ParserState () -skipHtmlTag tag = optional (htmlTag tag) - -- h1 class="title" representation of title in body bodyTitle :: GenParser Char ParserState [Inline] bodyTitle = try $ do - (_, attribs) <- htmlTag "h1" + (_, attribs) <- htmlOpenTag "h1" case (extractAttribute "class" attribs) of Just "title" -> return "" _ -> fail "not title" @@ -487,11 +501,11 @@ parseHtml :: GenParser Char ParserState Pandoc parseHtml = do sepEndBy (choice [xmlDec, definition, htmlComment]) spaces spaces - skipHtmlTag "html" + optional $ htmlOpenTag "html" spaces meta <- option (Meta [] [] []) parseHead spaces - skipHtmlTag "body" + optional $ htmlOpenTag "body" spaces optional bodyTitle -- skip title in body, because it's represented in meta blocks <- parseBlocks @@ -527,7 +541,7 @@ header = choice (map headerLevel (enumFromTo 1 5)) <?> "header" headerLevel :: Int -> GenParser Char ParserState Block headerLevel n = try $ do let level = "h" ++ show n - htmlTag level + htmlOpenTag level contents <- inlinesTilEnd level return $ Header n (normalizeSpaces contents) @@ -537,7 +551,7 @@ headerLevel n = try $ do hrule :: GenParser Char ParserState Block hrule = try $ do - (_, attribs) <- htmlTag "hr" + (_, attribs) <- htmlSelfClosingTag "hr" state <- getState if not (null attribs) && stateParseRaw state then unexpected "attributes in hr" -- parse as raw in this case @@ -551,7 +565,7 @@ hrule = try $ do -- skipped, because they are not portable to output formats other than HTML. codeBlock :: GenParser Char ParserState Block codeBlock = try $ do - htmlTag "pre" + htmlOpenTag "pre" result <- manyTill (many1 (satisfy (/= '<')) <|> ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) @@ -572,7 +586,7 @@ codeBlock = try $ do -- blockQuote :: GenParser Char ParserState Block -blockQuote = try $ htmlTag "blockquote" >> spaces >> +blockQuote = try $ htmlOpenTag "blockquote" >> spaces >> blocksTilEnd "blockquote" >>= (return . BlockQuote) -- @@ -584,7 +598,7 @@ list = choice [ bulletList, orderedList, definitionList ] <?> "list" orderedList :: GenParser Char ParserState Block orderedList = try $ do - (_, attribs) <- htmlTag "ol" + (_, attribs) <- htmlOpenTag "ol" (start, style) <- option (1, DefaultStyle) $ do failIfStrict let sta = fromMaybe "1" $ @@ -609,7 +623,7 @@ orderedList = try $ do bulletList :: GenParser Char ParserState Block bulletList = try $ do - htmlTag "ul" + htmlOpenTag "ul" spaces -- note: if they have an <ol> or <ul> not in scope of a <li>, -- treat it as a list item, though it's not valid xhtml... @@ -620,7 +634,7 @@ bulletList = try $ do definitionList :: GenParser Char ParserState Block definitionList = try $ do failIfStrict -- def lists not part of standard markdown - htmlTag "dl" + htmlOpenTag "dl" spaces items <- sepEndBy1 definitionListItem spaces htmlEndTag "dl" @@ -638,7 +652,7 @@ definitionListItem = try $ do -- para :: GenParser Char ParserState Block -para = try $ htmlTag "p" >> inlinesTilEnd "p" >>= +para = try $ htmlOpenTag "p" >> inlinesTilEnd "p" >>= return . Para . normalizeSpaces -- @@ -672,8 +686,8 @@ inline = choice [ charRef code :: GenParser Char ParserState Inline code = try $ do - htmlTag "code" - result <- manyTill anyChar (htmlEndTag "code") + result <- (htmlOpenTag "code" >> manyTill (noneOf "<>") (htmlEndTag "code")) + <|> (htmlOpenTag "tt" >> manyTill (noneOf "<>") (htmlEndTag "tt")) -- remove internal line breaks, leading and trailing space, -- and decode character references return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ @@ -686,7 +700,7 @@ rawHtmlInline = do if stateParseRaw state then return (HtmlInline result) else return (Str "") betweenTags :: [Char] -> GenParser Char ParserState [Inline] -betweenTags tag = try $ htmlTag tag >> inlinesTilEnd tag >>= +betweenTags tag = try $ htmlOpenTag tag >> inlinesTilEnd tag >>= return . normalizeSpaces emph :: GenParser Char ParserState Inline @@ -708,7 +722,7 @@ strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= spanStrikeout :: GenParser Char ParserState Inline spanStrikeout = try $ do failIfStrict -- strict markdown has no strikeout, so treat as raw HTML - (_, attributes) <- htmlTag "span" + (_, attributes) <- htmlOpenTag "span" result <- case (extractAttribute "class" attributes) of Just "strikeout" -> inlinesTilEnd "span" _ -> fail "not a strikeout" @@ -719,7 +733,7 @@ whitespace = many1 space >> return Space -- hard line break linebreak :: GenParser Char ParserState Inline -linebreak = htmlTag "br" >> optional newline >> return LineBreak +linebreak = htmlSelfClosingTag "br" >> optional newline >> return LineBreak str :: GenParser Char st Inline str = many1 (noneOf "< \t\n&") >>= return . Str @@ -740,7 +754,7 @@ extractAttribute name ((attrName, contents):rest) = link :: GenParser Char ParserState Inline link = try $ do - (_, attributes) <- htmlTag "a" + (_, attributes) <- htmlOpenTag "a" url <- case (extractAttribute "href" attributes) of Just url -> return url Nothing -> fail "no href" @@ -750,7 +764,7 @@ link = try $ do image :: GenParser Char ParserState Inline image = try $ do - (_, attributes) <- htmlTag "img" + (_, attributes) <- htmlSelfClosingTag "img" url <- case (extractAttribute "src" attributes) of Just url -> return url Nothing -> fail "no src" diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 01fca9f2b..406809dfc 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -35,7 +35,8 @@ module Text.Pandoc.Readers.LaTeX ( import Text.ParserCombinators.Parsec import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe ) import Data.Char ( chr ) import Data.List ( isPrefixOf, isSuffixOf ) @@ -167,16 +168,37 @@ block = choice [ hrule -- header :: GenParser Char ParserState Block -header = try $ do +header = section <|> chapter + +chapter :: GenParser Char ParserState Block +chapter = try $ do + string "\\chapter" + result <- headerWithLevel 1 + updateState $ \s -> s{ stateHasChapters = True } + return result + +section :: GenParser Char ParserState Block +section = try $ do char '\\' subs <- many (try (string "sub")) base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4) + st <- getState + let lev = if stateHasChapters st + then length subs + base + 1 + else length subs + base + headerWithLevel lev + +headerWithLevel :: Int -> GenParser Char ParserState Block +headerWithLevel lev = try $ do + spaces optional (char '*') + spaces optional $ bracketedText '[' ']' -- alt title + spaces char '{' title' <- manyTill inline (char '}') spaces - return $ Header (length subs + base) (normalizeSpaces title') + return $ Header lev (normalizeSpaces title') -- -- hrule block diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a6d383fca..b655ea1a9 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -37,7 +37,8 @@ import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) import Data.Maybe import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' ) import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, @@ -45,7 +46,8 @@ import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, htmlBlockElement, htmlComment, unsanitaryURI ) import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec -import Control.Monad (when, liftM, unless) +import Control.Monad (when, liftM, unless, guard) +import Text.TeXMath.Macros (applyMacros, Macro, pMacroDefinition) -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ParserState -- ^ Parser state, including options for parser @@ -68,7 +70,7 @@ setextHChars = "=-" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\[]*_~`<>$!^-.&'\";" +specialChars = "\\[]*_~`<>$!^-.&@'\";" -- -- auxiliary functions @@ -184,7 +186,18 @@ parseMarkdown = do -- now parse it for real... (title, author, date) <- option ([],[],[]) titleBlock blocks <- parseBlocks - return $ Pandoc (Meta title author date) $ filter (/= Null) blocks + let doc = Pandoc (Meta title author date) $ filter (/= Null) blocks + -- if there are labeled examples, change references into numbers + examples <- liftM stateExamples getState + let handleExampleRef :: Inline -> Inline + handleExampleRef z@(Str ('@':xs)) = + case M.lookup xs examples of + Just n -> Str (show n) + Nothing -> z + handleExampleRef z = z + if M.null examples + then return doc + else return $ processWith handleExampleRef doc -- -- initial pass for references and notes @@ -272,6 +285,7 @@ block = do , plain , nullBlock ] else [ codeBlockDelimited + , macro , header , table , codeBlockIndented @@ -716,7 +730,7 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -735,7 +749,9 @@ simpleTableHeader headless = try $ do let rawHeads' = if headless then replicate (length dashes) "" else rawHeads - return (rawHeads', aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads' + return (heads, aligns, indices) -- Parse a table footer - dashed lines followed by blank line. tableFooter :: GenParser Char ParserState [Char] @@ -764,65 +780,27 @@ multilineRow :: [Int] -> GenParser Char ParserState [[Block]] multilineRow indices = do colLines <- many1 (rawTableLine indices) - optional blanklines let cols = map unlines $ transpose colLines mapM (parseFromString (many plain)) cols --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths' = zipWith (-) indices (0:indices) - lengths = reverse $ - case reverse lengths' of - [] -> [] - [x] -> [x] - -- compensate for the fact that intercolumn - -- spaces are counted in widths of all columns - -- but the last... - (x:y:zs) -> if x < y && y - x <= 2 - then y:y:zs - else x:y:zs - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. tableCaption :: GenParser Char ParserState [Inline] tableCaption = try $ do skipNonindentSpaces - string "Table:" + string ":" <|> string "Table:" result <- many1 inline blanklines return $ normalizeSpaces result --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState end - -> GenParser Char ParserState Block -tableWith headerParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines' <- many1Till (lineParser indices) footerParser - caption <- option [] tableCaption - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines' - -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block simpleTable headless = do Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine - (if headless then tableFooter else tableFooter <|> blanklines) + (return ()) + (if headless then tableFooter else tableFooter <|> blanklines) + tableCaption -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l @@ -833,10 +811,10 @@ simpleTable headless = do multilineTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block multilineTable headless = - tableWith (multilineTableHeader headless) multilineRow tableFooter + tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption multilineTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([String], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) multilineTableHeader headless = try $ do if headless then return '\n' @@ -860,7 +838,9 @@ multilineTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else map (intercalate " ") rawHeadsList - return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the @@ -880,9 +860,37 @@ alignType strLst len = (True, True) -> AlignCenter (False, False) -> AlignDefault +gridTable :: Bool -- ^ Headerless table + -> GenParser Char ParserState Block +gridTable = gridTableWith block tableCaption + table :: GenParser Char ParserState Block table = multilineTable False <|> simpleTable True <|> - simpleTable False <|> multilineTable True <?> "table" + simpleTable False <|> multilineTable True <|> + gridTable False <|> gridTable True <?> "table" + +-- +-- Macros +-- + +-- | Parse a \newcommand or \renewcommand macro definition. +macro :: GenParser Char ParserState Block +macro = getState >>= guard . stateApplyMacros >> + pMacroDefinition >>= addMacro >> blanklines >> return Null + +-- | Add a macro to the list of macros in state. +addMacro :: Macro -> GenParser Char ParserState () +addMacro m = do + updateState $ \st -> st{ stateMacros = m : stateMacros st } + +-- | Apply current macros to string. +applyMacros' :: String -> GenParser Char ParserState String +applyMacros' target = do + apply <- liftM stateApplyMacros getState + if apply + then do macros <- liftM stateMacros getState + return $ applyMacros macros target + else return target -- -- inline @@ -916,6 +924,7 @@ inlineParsers = [ str , rawHtmlInline' , rawLaTeXInline' , escapedChar + , exampleRef , symbol , ltSign ] @@ -951,6 +960,14 @@ ltSign = do specialCharsMinusLt :: [Char] specialCharsMinusLt = filter (/= '<') specialChars +exampleRef :: GenParser Char ParserState Inline +exampleRef = try $ do + char '@' + lab <- many1 (alphaNum <|> oneOf "-_") + -- We just return a Str. These are replaced with numbers + -- later. See the end of parseMarkdown. + return $ Str $ '@' : lab + symbol :: GenParser Char ParserState Inline symbol = do result <- oneOf specialCharsMinusLt @@ -977,8 +994,8 @@ mathChunk = do char '\\' <|> many1 (noneOf " \t\n\\$") math :: GenParser Char ParserState Inline -math = (mathDisplay >>= return . Math DisplayMath) - <|> (mathInline >>= return . Math InlineMath) +math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath) + <|> (mathInline >>= applyMacros' >>= return . Math InlineMath) mathDisplay :: GenParser Char ParserState String mathDisplay = try $ do @@ -1285,7 +1302,7 @@ rawHtmlInline' = do st <- getState result <- if stateStrict st then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else anyHtmlInlineTag + else choice [htmlComment, anyHtmlInlineTag] return $ HtmlInline result #ifdef _CITEPROC diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7b4b5eee8..13afe5053 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,9 +31,10 @@ module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.ParserCombinators.Parsec -import Control.Monad ( when, unless, liftM ) +import Control.Monad ( when, unless ) import Data.List ( findIndex, intercalate, transpose, sort ) import qualified Data.Map as M import Text.Printf ( printf ) @@ -424,7 +425,7 @@ bulletListStart = try $ do -- parses ordered list start and returns its length (inc following whitespace) orderedListStart :: ListNumberStyle -> ListNumberDelim - -> GenParser Char st Int + -> GenParser Char ParserState Int orderedListStart style delim = try $ do (_, markerLen) <- withHorizDisplacement (orderedListMarker style delim) white <- many1 spaceChar @@ -607,41 +608,20 @@ dashedLine ch = do simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -gridPart :: Char -> GenParser Char st (Int, Int) -gridPart ch = do - dashes <- many1 (char ch) - char '+' - return (length dashes, length dashes + 1) - -gridDashedLines :: Char -> GenParser Char st [(Int,Int)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline - -- Parse a table row separator simpleTableSep :: Char -> GenParser Char ParserState Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -gridTableSep :: Char -> GenParser Char ParserState Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - -- Parse a table footer simpleTableFooter :: GenParser Char ParserState [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -gridTableFooter :: GenParser Char ParserState [Char] -gridTableFooter = blanklines - -- Parse a raw line and split it into chunks by indices. simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] simpleTableRawLine indices = do line <- many1Till anyChar newline return (simpleTableSplitLine indices line) -gridTableRawLine :: [Int] -> GenParser Char ParserState [String] -gridTableRawLine indices = do - char '|' - line <- many1Till anyChar newline - return (gridTableSplitLine indices $ removeTrailingSpace line) - -- Parse a table row and return a list of blocks (columns). simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] simpleTableRow indices = do @@ -651,64 +631,13 @@ simpleTableRow indices = do let cols = map unlines . transpose $ firstLine : colLines mapM (parseFromString (many plain)) cols -gridTableRow :: [Int] - -> GenParser Char ParserState [[Block]] -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - mapM (liftM compactifyCell . parseFromString (many block)) cols - -compactifyCell :: [Block] -> [Block] -compactifyCell bs = head $ compactify [bs] - simpleTableSplitLine :: [Int] -> String -> [String] simpleTableSplitLine indices line = map removeLeadingTrailingSpace $ tail $ splitByIndices (init indices) line -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = - map removeFinalBar $ tail $ splitByIndices (init indices) line - -removeFinalBar :: String -> String -removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") . - reverse - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths' = zipWith (-) indices (0:indices) - lengths = reverse $ - case reverse lengths' of - [] -> [] - [x] -> [x] - -- compensate for the fact that intercolumn - -- spaces are counted in widths of all columns - -- but the last... - (x:y:zs) -> if x < y && y - x <= 2 - then y:y:zs - else x:y:zs - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -722,64 +651,23 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - return (rawHeads, aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) -gridTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([String], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return $ repeat "" - else many1 - (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () - let lines' = map snd dashes - let indices = scanl (+) 0 lines' - let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments - let rawHeads = if headless - then replicate (length dashes) "" - else map (intercalate " ") $ transpose - $ map (gridTableSplitLine indices) rawContent - return (rawHeads, aligns, indices) - --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState sep - -> GenParser Char ParserState end - -> GenParser Char ParserState Block -tableWith headerParser rowParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy` lineParser - footerParser - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let captions = [] -- no notion of captions in RST - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table captions aligns widths heads lines' - --- Parse a simple table with '---' header and one line per row. +-- Parse a simple table. simpleTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter + Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return []) -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l where sep = return () -- optional (simpleTableSep '-') --- Parse a grid table: starts with row of '-' on top, then header --- (which may be grid), then the rows, --- which may be grid, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter + -> GenParser Char ParserState Block +gridTable = gridTableWith block (return []) table :: GenParser Char ParserState Block table = gridTable False <|> simpleTable False <|> diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 40cf39987..ca839dd08 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -86,6 +86,28 @@ expToInlines (ESubsup x y z) = do y' <- expToInlines y z' <- expToInlines z return $ x' ++ [Subscript y'] ++ [Superscript z'] -expToInlines (EText _ x) = Just [Emph [Str x]] +expToInlines (EDown x y) = expToInlines (ESub x y) +expToInlines (EUp x y) = expToInlines (ESuper x y) +expToInlines (EDownup x y z) = expToInlines (ESubsup x y z) +expToInlines (EText "normal" x) = Just [Str x] +expToInlines (EText "bold" x) = Just [Strong [Str x]] +expToInlines (EText "monospace" x) = Just [Code x] +expToInlines (EText "italic" x) = Just [Emph [Str x]] +expToInlines (EText _ x) = Just [Str x] +expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) = + case accent of + '\x203E' -> Just [Emph [Str [c,'\x0304']]] -- bar + '\x00B4' -> Just [Emph [Str [c,'\x0301']]] -- acute + '\x0060' -> Just [Emph [Str [c,'\x0300']]] -- grave + '\x02D8' -> Just [Emph [Str [c,'\x0306']]] -- breve + '\x02C7' -> Just [Emph [Str [c,'\x030C']]] -- check + '.' -> Just [Emph [Str [c,'\x0307']]] -- dot + '\x00B0' -> Just [Emph [Str [c,'\x030A']]] -- ring + '\x20D7' -> Just [Emph [Str [c,'\x20D7']]] -- arrow right + '\x20D6' -> Just [Emph [Str [c,'\x20D6']]] -- arrow left + '\x005E' -> Just [Emph [Str [c,'\x0302']]] -- hat + '\x0302' -> Just [Emph [Str [c,'\x0302']]] -- hat + '~' -> Just [Emph [Str [c,'\x0303']]] -- tilde + _ -> Nothing expToInlines _ = Nothing diff --git a/src/Text/Pandoc/S5.hs b/src/Text/Pandoc/S5.hs new file mode 100644 index 000000000..1567a3ede --- /dev/null +++ b/src/Text/Pandoc/S5.hs @@ -0,0 +1,57 @@ +{- +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.S5 + Copyright : Copyright (C) 2006-2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Definitions for creation of S5 powerpoint-like HTML. +(See <http://meyerweb.com/eric/tools/s5/>.) +-} +module Text.Pandoc.S5 ( s5HeaderIncludes) where +import Text.Pandoc.Shared ( readDataFile ) +import System.FilePath ( (</>) ) + +s5HeaderIncludes :: Maybe FilePath -> IO String +s5HeaderIncludes datadir = do + c <- s5CSS datadir + j <- s5Javascript datadir + return $ c ++ j + +s5Javascript :: Maybe FilePath -> IO String +s5Javascript datadir = do + jsCom <- readDataFile datadir $ "s5" </> "default" </> "slides.js.comment" + jsPacked <- readDataFile datadir $ "s5" </> "default" </> "slides.js.packed" + return $ "<script type=\"text/javascript\">\n" ++ jsCom ++ jsPacked ++ + "</script>\n" + +s5CSS :: Maybe FilePath -> IO String +s5CSS datadir = do + s5CoreCSS <- readDataFile datadir $ "s5" </> "default" </> "s5-core.css" + s5FramingCSS <- readDataFile datadir $ "s5" </> "default" </> "framing.css" + s5PrettyCSS <- readDataFile datadir $ "s5" </> "default" </> "pretty.css" + s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css" + s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css" + s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css" + return $ "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n" + diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 88eccb96c..0fdaf42f3 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -45,55 +45,15 @@ module Text.Pandoc.Shared ( toRomanNumeral, escapeURI, unescapeURI, + tabFilter, + -- * Prettyprinting wrapped, wrapIfNeeded, wrappedTeX, wrapTeXIfNeeded, BlockWrapper (..), wrappedBlocksToDoc, - tabFilter, - -- * Parsing - (>>~), - anyLine, - many1Till, - notFollowedBy', - oneOfStrings, - spaceChar, - skipSpaces, - blankline, - blanklines, - enclosed, - stringAnyCase, - parseFromString, - lineClump, - charsInBalanced, - charsInBalanced', - romanNumeral, - emailAddress, - uri, - withHorizDisplacement, - nullBlock, - failIfStrict, - failUnlessLHS, - escaped, - anyOrderedListMarker, - orderedListMarker, - charRef, - readWith, - testStringWith, - ParserState (..), - defaultParserState, - HeaderType (..), - ParserContext (..), - QuoteContext (..), - NoteTable, - KeyTable, - Key (..), - lookupKeySrc, - refsMatch, - -- * Prettyprinting hang', - prettyPandoc, -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, @@ -102,9 +62,11 @@ module Text.Pandoc.Shared ( hierarchicalize, uniqueIdent, isHeaderBlock, + headerShift, -- * Writer options HTMLMathMethod (..), ObfuscationMethod (..), + HTMLSlideVariant (..), WriterOptions (..), defaultWriterOptions, -- * File handling @@ -113,22 +75,18 @@ module Text.Pandoc.Shared ( ) where import Text.Pandoc.Definition -import qualified Text.Pandoc.UTF8 as UTF8 (readFile, putStrLn) -import Text.ParserCombinators.Parsec +import qualified Text.Pandoc.UTF8 as UTF8 (readFile) import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest ) import qualified Text.PrettyPrint.HughesPJ as PP -import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, isAscii, +import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii, isLetter, isDigit ) import Data.List ( find, isPrefixOf, intercalate ) -import Network.URI ( parseURI, URI (..), isAllowedInURI, escapeURIString, unEscapeString ) +import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString ) import Codec.Binary.UTF8.String ( encodeString, decodeString ) import System.Directory import System.FilePath ( (</>) ) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S -import Control.Monad (join) -import qualified Data.Map as M import Paths_pandoc (getDataFileName) -- @@ -153,11 +111,11 @@ splitByIndices (x:xs) lst = -- | Replace each occurrence of one sublist in a list with another. substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] substitute _ _ [] = [] -substitute [] _ lst = lst -substitute target replacement lst = +substitute [] _ xs = xs +substitute target replacement lst@(x:xs) = if target `isPrefixOf` lst - then replacement ++ (substitute target replacement $ drop (length target) lst) - else (head lst):(substitute target replacement $ tail lst) + then replacement ++ substitute target replacement (drop (length target) lst) + else x : substitute target replacement xs -- -- Text processing @@ -239,6 +197,30 @@ unescapeURI :: String -> String unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) . decodeString . unEscapeString +-- | Convert tabs to spaces and filter out DOS line endings. +-- Tabs will be preserved if tab stop is set to 0. +tabFilter :: Int -- ^ Tab stop + -> String -- ^ Input + -> String +tabFilter tabStop = + let go _ [] = "" + go _ ('\n':xs) = '\n' : go tabStop xs + go _ ('\r':'\n':xs) = '\n' : go tabStop xs + go _ ('\r':xs) = '\n' : go tabStop xs + go spsToNextStop ('\t':xs) = + if tabStop == 0 + then '\t' : go tabStop xs + else replicate spsToNextStop ' ' ++ go tabStop xs + go 1 (x:xs) = + x : go tabStop xs + go spsToNextStop (x:xs) = + x : go (spsToNextStop - 1) xs + in go tabStop + +-- +-- Prettyprinting +-- + -- | Wrap inlines to line length. wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>= @@ -308,546 +290,10 @@ wrappedBlocksToDoc = foldr addBlock empty addBlock (Pad d) accum = d $$ text "" $$ accum addBlock (Reg d) accum = d $$ accum --- | Convert tabs to spaces and filter out DOS line endings. --- Tabs will be preserved if tab stop is set to 0. -tabFilter :: Int -- ^ Tab stop - -> String -- ^ Input - -> String -tabFilter tabStop = - let go _ [] = "" - go _ ('\n':xs) = '\n' : go tabStop xs - go _ ('\r':'\n':xs) = '\n' : go tabStop xs - go _ ('\r':xs) = '\n' : go tabStop xs - go spsToNextStop ('\t':xs) = - if tabStop == 0 - then '\t' : go tabStop xs - else replicate spsToNextStop ' ' ++ go tabStop xs - go 1 (x:xs) = - x : go tabStop xs - go spsToNextStop (x:xs) = - x : go (spsToNextStop - 1) xs - in go tabStop - --- --- Parsing --- - --- | Like >>, but returns the operation on the left. --- (Suggested by Tillmann Rendel on Haskell-cafe list.) -(>>~) :: (Monad m) => m a -> m b -> m a -a >>~ b = a >>= \x -> b >> return x - --- | Parse any line of text -anyLine :: GenParser Char st [Char] -anyLine = manyTill anyChar newline - --- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a - -> GenParser tok st end - -> GenParser tok st [a] -many1Till p end = do - first <- p - rest <- manyTill p end - return (first:rest) - --- | A more general form of @notFollowedBy@. This one allows any --- type of parser to be specified, and succeeds only if that parser fails. --- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () -notFollowedBy' p = try $ join $ do a <- try p - return (unexpected (show a)) - <|> - return (return ()) --- (This version due to Andrew Pimlott on the Haskell mailing list.) - --- | Parses one of a list of strings (tried in order). -oneOfStrings :: [String] -> GenParser Char st String -oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings - --- | Parses a space or tab. -spaceChar :: CharParser st Char -spaceChar = char ' ' <|> char '\t' - --- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () -skipSpaces = skipMany spaceChar - --- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char -blankline = try $ skipSpaces >> newline - --- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] -blanklines = many1 blankline - --- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t -- ^ start parser - -> GenParser Char st end -- ^ end parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] -enclosed start end parser = try $ - start >> notFollowedBy space >> many1Till parser end - --- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String -stringAnyCase [] = string "" -stringAnyCase (x:xs) = do - firstChar <- char (toUpper x) <|> char (toLower x) - rest <- stringAnyCase xs - return (firstChar:rest) - --- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a -parseFromString parser str = do - oldPos <- getPosition - oldInput <- getInput - setInput str - result <- parser - setInput oldInput - setPosition oldPos - return result - --- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String -lineClump = blanklines - <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) - --- | Parse a string of characters between an open character --- and a close character, including text between balanced --- pairs of open and close, which must be different. For example, --- @charsInBalanced '(' ')'@ will parse "(hello (there))" --- and return "hello (there)". Stop if a blank line is --- encountered. -charsInBalanced :: Char -> Char -> GenParser Char st String -charsInBalanced open close = try $ do - char open - raw <- many $ (many1 (noneOf [open, close, '\n'])) - <|> (do res <- charsInBalanced open close - return $ [open] ++ res ++ [close]) - <|> try (string "\n" >>~ notFollowedBy' blanklines) - char close - return $ concat raw - --- | Like @charsInBalanced@, but allow blank lines in the content. -charsInBalanced' :: Char -> Char -> GenParser Char st String -charsInBalanced' open close = try $ do - char open - raw <- many $ (many1 (noneOf [open, close])) - <|> (do res <- charsInBalanced' open close - return $ [open] ++ res ++ [close]) - char close - return $ concat raw - --- Auxiliary functions for romanNumeral: - -lowercaseRomanDigits :: [Char] -lowercaseRomanDigits = ['i','v','x','l','c','d','m'] - -uppercaseRomanDigits :: [Char] -uppercaseRomanDigits = map toUpper lowercaseRomanDigits - --- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Bool -- ^ Uppercase if true - -> GenParser Char st Int -romanNumeral upperCase = do - let romanDigits = if upperCase - then uppercaseRomanDigits - else lowercaseRomanDigits - lookAhead $ oneOf romanDigits - let [one, five, ten, fifty, hundred, fivehundred, thousand] = - map char romanDigits - thousands <- many thousand >>= (return . (1000 *) . length) - ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 - fivehundreds <- many fivehundred >>= (return . (500 *) . length) - fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 - hundreds <- many hundred >>= (return . (100 *) . length) - nineties <- option 0 $ try $ ten >> hundred >> return 90 - fifties <- many fifty >>= (return . (50 *) . length) - forties <- option 0 $ try $ ten >> fifty >> return 40 - tens <- many ten >>= (return . (10 *) . length) - nines <- option 0 $ try $ one >> ten >> return 9 - fives <- many five >>= (return . (5 *) . length) - fours <- option 0 $ try $ one >> five >> return 4 - ones <- many one >>= (return . length) - let total = thousands + ninehundreds + fivehundreds + fourhundreds + - hundreds + nineties + fifties + forties + tens + nines + - fives + fours + ones - if total == 0 - then fail "not a roman numeral" - else return total - --- Parsers for email addresses and URIs - -emailChar :: GenParser Char st Char -emailChar = alphaNum <|> oneOf "-+_." - -domainChar :: GenParser Char st Char -domainChar = alphaNum <|> char '-' - -domain :: GenParser Char st [Char] -domain = do - first <- many1 domainChar - dom <- many1 $ try (char '.' >> many1 domainChar ) - return $ intercalate "." (first:dom) - --- | Parses an email address; returns original and corresponding --- escaped mailto: URI. -emailAddress :: GenParser Char st (String, String) -emailAddress = try $ do - firstLetter <- alphaNum - restAddr <- many emailChar - let addr = firstLetter:restAddr - char '@' - dom <- domain - let full = addr ++ '@':dom - return (full, escapeURI $ "mailto:" ++ full) - --- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: GenParser Char st (String, String) -uri = try $ do - let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", - "news:", "telnet:" ] - lookAhead $ oneOfStrings protocols - -- scan non-ascii characters and ascii characters allowed in a URI - str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c) - -- now see if they amount to an absolute URI - case parseURI (escapeURI str) of - Just uri' -> if uriScheme uri' `elem` protocols - then return (str, show uri') - else fail "not a URI" - Nothing -> fail "not a URI" - --- | Applies a parser, returns tuple of its results and its horizontal --- displacement (the difference between the source column at the end --- and the source column at the beginning). Vertical displacement --- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply - -> GenParser Char st (a, Int) -- ^ (result, displacement) -withHorizDisplacement parser = do - pos1 <- getPosition - result <- parser - pos2 <- getPosition - return (result, sourceColumn pos2 - sourceColumn pos1) - --- | Parses a character and returns 'Null' (so that the parser can move on --- if it gets stuck). -nullBlock :: GenParser Char st Block -nullBlock = anyChar >> return Null - --- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser Char ParserState () -failIfStrict = do - state <- getState - if stateStrict state then fail "strict mode" else return () - --- | Fail unless we're in literate haskell mode. -failUnlessLHS :: GenParser tok ParserState () -failUnlessLHS = do - state <- getState - if stateLiterateHaskell state then return () else fail "Literate haskell feature" - --- | Parses backslash, then applies character parser. -escaped :: GenParser Char st Char -- ^ Parser for character to escape - -> GenParser Char st Inline -escaped parser = try $ do - char '\\' - result <- parser - return (Str [result]) - --- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: GenParser Char st (ListNumberStyle, Int) -upperRoman = do - num <- romanNumeral True - return (UpperRoman, num) - --- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: GenParser Char st (ListNumberStyle, Int) -lowerRoman = do - num <- romanNumeral False - return (LowerRoman, num) - --- | Parses a decimal numeral and returns (Decimal, number). -decimal :: GenParser Char st (ListNumberStyle, Int) -decimal = do - num <- many1 digit - return (Decimal, read num) - --- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: GenParser Char st (ListNumberStyle, Int) -defaultNum = do - char '#' - return (DefaultStyle, 1) - --- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: GenParser Char st (ListNumberStyle, Int) -lowerAlpha = do - ch <- oneOf ['a'..'z'] - return (LowerAlpha, ord ch - ord 'a' + 1) - --- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: GenParser Char st (ListNumberStyle, Int) -upperAlpha = do - ch <- oneOf ['A'..'Z'] - return (UpperAlpha, ord ch - ord 'A' + 1) - --- | Parses a roman numeral i or I -romanOne :: GenParser Char st (ListNumberStyle, Int) -romanOne = (char 'i' >> return (LowerRoman, 1)) <|> - (char 'I' >> return (UpperRoman, 1)) - --- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: GenParser Char st ListAttributes -anyOrderedListMarker = choice $ - [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], - numParser <- [decimal, defaultNum, romanOne, - lowerAlpha, lowerRoman, upperAlpha, upperRoman]] - --- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inPeriod num = try $ do - (style, start) <- num - char '.' - let delim = if style == DefaultStyle - then DefaultDelim - else Period - return (start, style, delim) - --- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inOneParen num = try $ do - (style, start) <- num - char ')' - return (start, style, OneParen) - --- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inTwoParens num = try $ do - char '(' - (style, start) <- num - char ')' - return (start, style, TwoParens) - --- | Parses an ordered list marker with a given style and delimiter, --- returns number. -orderedListMarker :: ListNumberStyle - -> ListNumberDelim - -> GenParser Char st Int -orderedListMarker style delim = do - let num = defaultNum <|> -- # can continue any kind of list - case style of - DefaultStyle -> decimal - Decimal -> decimal - UpperRoman -> upperRoman - LowerRoman -> lowerRoman - UpperAlpha -> upperAlpha - LowerAlpha -> lowerAlpha - let context = case delim of - DefaultDelim -> inPeriod - Period -> inPeriod - OneParen -> inOneParen - TwoParens -> inTwoParens - (start, _, _) <- context num - return start - --- | Parses a character reference and returns a Str element. -charRef :: GenParser Char st Inline -charRef = do - c <- characterReference - return $ Str [c] - --- | Parse a string with a given parser and state. -readWith :: GenParser Char ParserState a -- ^ parser - -> ParserState -- ^ initial state - -> String -- ^ input string - -> a -readWith parser state input = - case runParser parser state "source" input of - Left err -> error $ "\nError:\n" ++ show err - Right result -> result - --- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => GenParser Char ParserState a - -> String - -> IO () -testStringWith parser str = UTF8.putStrLn $ show $ - readWith parser defaultParserState str - --- | Parsing options. -data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? - stateParserContext :: ParserContext, -- ^ Inside list? - stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? - stateKeys :: KeyTable, -- ^ List of reference keys -#ifdef _CITEPROC - stateCitations :: [String], -- ^ List of available citations -#endif - stateNotes :: NoteTable, -- ^ List of notes - stateTabStop :: Int, -- ^ Tab stop - stateStandalone :: Bool, -- ^ Parse bibliographic info? - stateTitle :: [Inline], -- ^ Title of document - stateAuthors :: [[Inline]], -- ^ Authors of document - stateDate :: [Inline], -- ^ Date of document - stateStrict :: Bool, -- ^ Use strict markdown syntax? - stateSmart :: Bool, -- ^ Use smart typography? - stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell - stateColumns :: Int, -- ^ Number of columns in terminal - stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateIndentedCodeClasses :: [String] -- ^ Classes to use for indented code blocks - } - deriving Show - -defaultParserState :: ParserState -defaultParserState = - ParserState { stateParseRaw = False, - stateParserContext = NullState, - stateQuoteContext = NoQuote, - stateSanitizeHTML = False, - stateKeys = M.empty, -#ifdef _CITEPROC - stateCitations = [], -#endif - stateNotes = [], - stateTabStop = 4, - stateStandalone = False, - stateTitle = [], - stateAuthors = [], - stateDate = [], - stateStrict = False, - stateSmart = False, - stateLiterateHaskell = False, - stateColumns = 80, - stateHeaderTable = [], - stateIndentedCodeClasses = [] } - -data HeaderType - = SingleHeader Char -- ^ Single line of characters underneath - | DoubleHeader Char -- ^ Lines of characters above and below - deriving (Eq, Show) - -data ParserContext - = ListItemState -- ^ Used when running parser on list item contents - | NullState -- ^ Default state - deriving (Eq, Show) - -data QuoteContext - = InSingleQuote -- ^ Used when parsing inside single quotes - | InDoubleQuote -- ^ Used when parsing inside double quotes - | NoQuote -- ^ Used when not parsing inside quotes - deriving (Eq, Show) - -type NoteTable = [(String, String)] - -newtype Key = Key [Inline] deriving (Show, Read) - -instance Eq Key where - Key a == Key b = refsMatch a b - -instance Ord Key where - compare (Key a) (Key b) = if a == b then EQ else compare a b - -type KeyTable = M.Map Key Target - --- | Look up key in key table and return target object. -lookupKeySrc :: KeyTable -- ^ Key table - -> Key -- ^ Key - -> Maybe Target -lookupKeySrc table key = case M.lookup key table of - Nothing -> Nothing - Just src -> Just src - --- | Returns @True@ if keys match (case insensitive). -refsMatch :: [Inline] -> [Inline] -> Bool -refsMatch ((Str x):restx) ((Str y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Emph x):restx) ((Emph y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strong x):restx) ((Strong y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Superscript x):restx) ((Superscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Subscript x):restx) ((Subscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = - t == u && refsMatch x y && refsMatch restx resty -refsMatch ((Code x):restx) ((Code y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Math t x):restx) ((Math u y):resty) = - ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty -refsMatch ((TeX x):restx) ((TeX y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty -refsMatch [] x = null x -refsMatch x [] = null x - --- --- Prettyprinting --- - -- | A version of hang that works like the version in pretty-1.0.0.0 hang' :: Doc -> Int -> Doc -> Doc hang' d1 n d2 = d1 $$ (nest n d2) --- | Indent string as a block. -indentBy :: Int -- ^ Number of spaces to indent the block - -> Int -- ^ Number of spaces (rel to block) to indent first line - -> String -- ^ Contents of block to indent - -> String -indentBy _ _ [] = "" -indentBy num first str = - let (firstLine:restLines) = lines str - firstLineIndent = num + first - in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ - (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines) - --- | Prettyprint list of Pandoc blocks elements. -prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks - -> [Block] -- ^ List of blocks - -> String -prettyBlockList indent [] = indentBy indent 0 "[]" -prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ - (intercalate "\n, " (map prettyBlock blocks)) ++ " ]" - --- | Prettyprint Pandoc block element. -prettyBlock :: Block -> String -prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ - (prettyBlockList 2 blocks) -prettyBlock (OrderedList attribs blockLists) = - "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ - (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks) - blockLists)) ++ " ]" -prettyBlock (BulletList blockLists) = "BulletList\n" ++ - indentBy 2 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" -prettyBlock (DefinitionList items) = "DefinitionList\n" ++ - indentBy 2 0 ("[ " ++ (intercalate "\n, " - (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++ - indentBy 3 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++ - ")") items))) ++ " ]" -prettyBlock (Table caption aligns widths header rows) = - "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ - show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ - (intercalate ",\n" (map prettyRow rows)) ++ " ]" - where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) - cols))) ++ " ]" -prettyBlock block = show block - --- | Prettyprint Pandoc document. -prettyPandoc :: Pandoc -> String -prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ - ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" - -- -- Pandoc block and inline list processing -- @@ -859,6 +305,7 @@ orderedListMarkers (start, numstyle, numdelim) = let singleton c = [c] nums = case numstyle of DefaultStyle -> map show [start..] + Example -> map show [start..] Decimal -> map show [start..] UpperAlpha -> drop (start - 1) $ cycle $ map singleton ['A'..'Z'] @@ -920,11 +367,11 @@ data Element = Blk Block -- | Convert Pandoc inline list to plain text identifier. HTML -- identifiers must start with a letter, and may contain only --- letters, digits, and the characters _-:. +-- letters, digits, and the characters _-. inlineListToIdentifier :: [Inline] -> String inlineListToIdentifier = dropWhile (not . isAlpha) . intercalate "-" . words . map toLower . - filter (\c -> isLetter c || isDigit c || c `elem` "_-:. ") . + filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") . concatMap extractText where extractText x = case x of Str s -> s @@ -995,6 +442,13 @@ isHeaderBlock :: Block -> Bool isHeaderBlock (Header _ _) = True isHeaderBlock _ = False +-- | Shift header levels up or down. +headerShift :: Int -> Pandoc -> Pandoc +headerShift n = processWith shift + where shift :: Block -> Block + shift (Header level inner) = Header (level + n) inner + shift x = x + -- -- Writer options -- @@ -1003,8 +457,9 @@ data HTMLMathMethod = PlainMath | LaTeXMathML (Maybe String) -- url of LaTeXMathML.js | JsMath (Maybe String) -- url of jsMath load script | GladTeX - | MimeTeX String -- url of mimetex.cgi + | WebTeX String -- url of TeX->image script. | MathML (Maybe String) -- url of MathMLinHTML.js + | MathJax String -- url of MathJax.js deriving (Show, Read, Eq) -- | Methods for obfuscating email addresses in HTML. @@ -1013,27 +468,35 @@ data ObfuscationMethod = NoObfuscation | JavascriptObfuscation deriving (Show, Read, Eq) +-- | Varieties of HTML slide shows. +data HTMLSlideVariant = S5Slides + | SlidySlides + | NoSlides + deriving (Show, Read, Eq) + -- | Options for writers data WriterOptions = WriterOptions { writerStandalone :: Bool -- ^ Include header and footer , writerTemplate :: String -- ^ Template to use in standalone mode , writerVariables :: [(String, String)] -- ^ Variables to set in template - , writerIncludeBefore :: String -- ^ Text to include before the body - , writerIncludeAfter :: String -- ^ Text to include after the body + , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs , writerTableOfContents :: Bool -- ^ Include table of contents - , writerS5 :: Bool -- ^ We're writing S5 + , writerSlideVariant :: HTMLSlideVariant -- ^ Are we writing S5 or Slidy? + , writerIncremental :: Bool -- ^ True if lists should be incremental , writerXeTeX :: Bool -- ^ Create latex suitable for use by xetex , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc) - , writerIncremental :: Bool -- ^ Incremental S5 lists , writerNumberSections :: Bool -- ^ Number sections in LaTeX + , writerSectionDivs :: Bool -- ^ Put sections in div tags in HTML , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , writerWrapText :: Bool -- ^ Wrap text to line length , writerLiterateHaskell :: Bool -- ^ Write as literate haskell , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML + , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file + , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory } deriving Show -- | Default writer options. @@ -1042,22 +505,24 @@ defaultWriterOptions = WriterOptions { writerStandalone = False , writerTemplate = "" , writerVariables = [] - , writerIncludeBefore = "" - , writerIncludeAfter = "" + , writerEPUBMetadata = "" , writerTabStop = 4 , writerTableOfContents = False - , writerS5 = False + , writerSlideVariant = NoSlides + , writerIncremental = False , writerXeTeX = False , writerHTMLMathMethod = PlainMath , writerIgnoreNotes = False - , writerIncremental = False , writerNumberSections = False + , writerSectionDivs = True , writerStrictMarkdown = False , writerReferenceLinks = False , writerWrapText = True , writerLiterateHaskell = False , writerEmailObfuscation = JavascriptObfuscation , writerIdentifierPrefix = "" + , writerSourceDirectory = "." + , writerUserDataDir = Nothing } -- diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 2238f4da8..c8ddc3abf 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -83,7 +83,6 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first -> String -- ^ Name of writer -> IO (Either E.IOException String) getDefaultTemplate _ "native" = return $ Right "" -getDefaultTemplate user "s5" = getDefaultTemplate user "html" getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument" getDefaultTemplate user writer = do let format = takeWhile (/='+') writer -- strip off "+lhs" if present @@ -173,7 +172,7 @@ for = try $ do string "$for(" id' <- ident string ")$" - -- if newline after the "if", then a newline after "endif" will be swallowed + -- if newline after the "for", then a newline after "endfor" will be swallowed multiline <- option False $ try $ skipEndline >> return True let matches = filter (\(k,_) -> k == id') vars let indent = replicate pos ' ' diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 3dd61176c..eba79c734 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -37,8 +37,9 @@ module Text.Pandoc.UTF8 ( readFile ) where -import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 (toString, fromString) +import qualified Data.ByteString as B +import Codec.Binary.UTF8.String (encodeString) +import Data.ByteString.UTF8 (toString, fromString) import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn) import System.IO (Handle) import Control.Monad (liftM) @@ -51,10 +52,10 @@ stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s stripBOM s = s readFile :: FilePath -> IO String -readFile = liftM (toString . stripBOM) . B.readFile +readFile = liftM (toString . stripBOM) . B.readFile . encodeString writeFile :: FilePath -> String -> IO () -writeFile f = B.writeFile f . fromString +writeFile f = B.writeFile (encodeString f) . fromString getContents :: IO String getContents = liftM (toString . stripBOM) B.getContents diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs new file mode 100644 index 000000000..082644eea --- /dev/null +++ b/src/Text/Pandoc/UUID.hs @@ -0,0 +1,77 @@ +{- +Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.UUID + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +UUID generation using Version 4 (random method) described +in RFC4122. See http://tools.ietf.org/html/rfc4122 +-} + +module Text.Pandoc.UUID ( UUID, getRandomUUID ) where + +import Text.Printf ( printf ) +import System.Random ( randomIO ) +import Data.Word +import Data.Bits ( setBit, clearBit ) +import Control.Monad ( liftM ) + +data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 + Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 + +instance Show UUID where + show (UUID a b c d e f g h i j k l m n o p) = + "urn:uuid:" ++ + printf "%02x" a ++ + printf "%02x" b ++ + printf "%02x" c ++ + printf "%02x" d ++ + "-" ++ + printf "%02x" e ++ + printf "%02x" f ++ + "-" ++ + printf "%02x" g ++ + printf "%02x" h ++ + "-" ++ + printf "%02x" i ++ + printf "%02x" j ++ + "-" ++ + printf "%02x" k ++ + printf "%02x" l ++ + printf "%02x" m ++ + printf "%02x" n ++ + printf "%02x" o ++ + printf "%02x" p + +getRandomUUID :: IO UUID +getRandomUUID = do + let getRN :: a -> IO Word8 + getRN _ = liftM fromIntegral (randomIO :: IO Int) + [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] <- mapM getRN ([1..16] :: [Int]) + -- set variant + let i' = i `setBit` 7 `clearBit` 6 + -- set version (0100 for random) + let g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 + return $ UUID a b c d e f g' h i' j k l m n o p + diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 32948e292..a3a30f0a0 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -64,7 +64,7 @@ pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do then return "" else liftM render $ inlineListToConTeXt date body <- blockListToConTeXt blocks - let main = render body + let main = render $ body $$ text "" let context = writerVariables options ++ [ ("toc", if writerTableOfContents options then "yes" else "") , ("body", main) @@ -92,6 +92,8 @@ escapeCharForConTeXt ch = '#' -> "\\#" '<' -> "\\letterless{}" '>' -> "\\lettermore{}" + '[' -> "{[}" + ']' -> "{]}" '_' -> "\\letterunderscore{}" '\160' -> "~" x -> [x] @@ -153,6 +155,7 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do let style'' = case style' of DefaultStyle -> orderedListStyles !! level Decimal -> "[n]" + Example -> "[n]" LowerRoman -> "[r]" UpperRoman -> "[R]" LowerAlpha -> "[a]" diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 3abed1610..5223259eb 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -154,6 +154,7 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = let attribs = case numstyle of DefaultStyle -> [] Decimal -> [("numeration", "arabic")] + Example -> [("numeration", "arabic")] UpperAlpha -> [("numeration", "upperalpha")] LowerAlpha -> [("numeration", "loweralpha")] UpperRoman -> [("numeration", "upperroman")] diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs new file mode 100644 index 000000000..deaa2fe33 --- /dev/null +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -0,0 +1,283 @@ +{- +Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.EPUB + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to EPUB. +-} +module Text.Pandoc.Writers.EPUB ( writeEPUB ) where +import Data.IORef +import Data.Maybe ( fromMaybe, isNothing ) +import Data.List ( findIndices, isPrefixOf ) +import System.Environment ( getEnv ) +import System.FilePath ( (</>), takeBaseName, takeExtension ) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 ( fromString ) +import Codec.Archive.Zip +import System.Time +import Text.Pandoc.Shared hiding ( Element ) +import Text.Pandoc.Definition +import Control.Monad (liftM) +import Text.XML.Light hiding (ppTopElement) +import Text.Pandoc.UUID +import Text.Pandoc.Writers.HTML +import Text.Pandoc.Writers.Markdown ( writePlain ) +import Data.Char ( toLower ) + +-- | Produce an EPUB file from a Pandoc document. +writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line + -> WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> IO B.ByteString +writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do + (TOD epochtime _) <- getClockTime + let mkEntry path content = toEntry path epochtime content + let opts' = opts{ writerEmailObfuscation = NoObfuscation + , writerStandalone = True + , writerWrapText = False } + let sourceDir = writerSourceDirectory opts' + + -- title page + let vars = writerVariables opts' + let tpContent = fromString $ writeHtmlString + opts'{writerTemplate = pageTemplate + ,writerVariables = ("titlepage","yes"):vars} + (Pandoc meta []) + let tpEntry = mkEntry "title_page.xhtml" tpContent + + -- handle pictures + picsRef <- newIORef [] + Pandoc _ blocks <- liftM (processWith transformBlock) $ processWithM + (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc + pics <- readIORef picsRef + let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e -> + return e{ eRelativePath = newsrc } + picEntries <- mapM readPicEntry pics + + -- body pages + let isH1 (Header 1 _) = True + isH1 _ = False + let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks + let chunks = splitByIndices h1Indices blocks + let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys + titleize xs = Pandoc meta xs + let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate + , writerHTMLMathMethod = PlainMath } + let chapters = map titleize chunks + let chapterToEntry :: Int -> Pandoc -> Entry + chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $ + fromString $ chapToHtml chap + let chapterEntries = zipWith chapterToEntry [1..] chapters + + -- contents.opf + lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang") + (\_ -> return "en-US") + uuid <- getRandomUUID + let chapterNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", "application/xhtml+xml")] $ () + let chapterRefNode ent = unode "itemref" ! + [("idref", takeBaseName $ eRelativePath ent)] $ () + let pictureNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", fromMaybe "application/octet-stream" + $ imageTypeOf $ eRelativePath ent)] $ () + let plainify t = removeTrailingSpace $ + writePlain opts'{ writerStandalone = False } $ + Pandoc meta [Plain t] + let plainTitle = plainify $ docTitle meta + let plainAuthors = map plainify $ docAuthors meta + let contentsData = fromString $ ppTopElement $ + unode "package" ! [("version","2.0") + ,("xmlns","http://www.idpf.org/2007/opf") + ,("unique-identifier","BookId")] $ + [ metadataElement (writerEPUBMetadata opts') + uuid lang plainTitle plainAuthors + , unode "manifest" $ + [ unode "item" ! [("id","ncx"), ("href","toc.ncx") + ,("media-type","application/x-dtbncx+xml")] $ () + , unode "item" ! [("id","style"), ("href","stylesheet.css") + ,("media-type","text/css")] $ () + ] ++ + map chapterNode (tpEntry : chapterEntries) ++ + map pictureNode picEntries + , unode "spine" ! [("toc","ncx")] $ + map chapterRefNode (tpEntry : chapterEntries) + ] + let contentsEntry = mkEntry "content.opf" contentsData + + -- toc.ncx + let navPointNode ent n tit = unode "navPoint" ! + [("id", "navPoint-" ++ show n) + ,("playOrder", show n)] $ + [ unode "navLabel" $ unode "text" tit + , unode "content" ! [("src", + eRelativePath ent)] $ () + ] + let tocData = fromString $ ppTopElement $ + unode "ncx" ! [("version","2005-1") + ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ + [ unode "head" + [ unode "meta" ! [("name","dtb:uid") + ,("content", show uuid)] $ () + , unode "meta" ! [("name","dtb:depth") + ,("content", "1")] $ () + , unode "meta" ! [("name","dtb:totalPageCount") + ,("content", "0")] $ () + , unode "meta" ! [("name","dtb:maxPageNumber") + ,("content", "0")] $ () + ] + , unode "docTitle" $ unode "text" $ plainTitle + , unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries) + [1..(length chapterEntries + 1)] + ("Title Page" : map (\(Pandoc m _) -> + plainify $ docTitle m) chapters) + ] + let tocEntry = mkEntry "toc.ncx" tocData + + -- mimetype + let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip" + + -- container.xml + let containerData = fromString $ ppTopElement $ + unode "container" ! [("version","1.0") + ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ + unode "rootfiles" $ + unode "rootfile" ! [("full-path","content.opf") + ,("media-type","application/oebps-package+xml")] $ () + let containerEntry = mkEntry "META-INF/container.xml" containerData + + -- stylesheet + stylesheet <- case mbStylesheet of + Just s -> return s + Nothing -> readDataFile (writerUserDataDir opts) "epub.css" + let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet + + -- construct archive + let archive = foldr addEntryToArchive emptyArchive + (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry : + contentsEntry : tocEntry : (picEntries ++ chapterEntries) ) + return $ fromArchive archive + +metadataElement :: String -> UUID -> String -> String -> [String] -> Element +metadataElement metadataXML uuid lang title authors = + let userNodes = parseXML metadataXML + elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ + filter isDublinCoreElement $ onlyElems userNodes + dublinElements = ["contributor","coverage","creator","date", + "description","format","identifier","language","publisher", + "relation","rights","source","subject","title","type"] + isDublinCoreElement e = qPrefix (elName e) == Just "dc" && + qName (elName e) `elem` dublinElements + contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) + newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ + [ unode "dc:language" lang | not (elt `contains` "language") ] ++ + [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | + not (elt `contains` "identifier") ] ++ + [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] + in elt{ elContent = elContent elt ++ map Elem newNodes } + +transformInlines :: HTMLMathMethod + -> FilePath + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> [Inline] + -> IO [Inline] +transformInlines _ _ _ (Image lab (src,_) : xs) | isNothing (imageTypeOf src) = + return $ Emph lab : xs +transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do + pics <- readIORef picsRef + let oldsrc = sourceDir </> src + let ext = takeExtension src + newsrc <- case lookup oldsrc pics of + Just n -> return n + Nothing -> do + let new = "images/img" ++ show (length pics) ++ ext + modifyIORef picsRef ( (oldsrc, new): ) + return new + return $ Image lab (newsrc, tit) : xs +transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do + let writeHtmlInline opts z = removeTrailingSpace $ + writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]] + mathml = writeHtmlInline defaultWriterOptions{ + writerHTMLMathMethod = MathML Nothing } x + fallback = writeHtmlInline defaultWriterOptions{ + writerHTMLMathMethod = PlainMath } x + inOps = "<ops:switch xmlns:ops=\"http://www.idpf.org/2007/ops\">" ++ + "<ops:case required-namespace=\"http://www.w3.org/1998/Math/MathML\">" ++ + mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++ + "</ops:switch>" + result = if "<math" `isPrefixOf` mathml then inOps else mathml + return $ HtmlInline result : xs +transformInlines _ _ _ (HtmlInline _ : xs) = return $ Str "" : xs +transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs +transformInlines _ _ _ xs = return xs + +transformBlock :: Block -> Block +transformBlock (RawHtml _) = Null +transformBlock x = x + +(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element +(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) + +-- | Version of 'ppTopElement' that specifies UTF-8 encoding. +ppTopElement :: Element -> String +ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement + +imageTypeOf :: FilePath -> Maybe String +imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of + "jpg" -> Just "image/jpeg" + "jpeg" -> Just "image/jpeg" + "jfif" -> Just "image/jpeg" + "png" -> Just "image/png" + "gif" -> Just "image/gif" + "svg" -> Just "image/svg+xml" + _ -> Nothing + +pageTemplate :: String +pageTemplate = unlines + [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" + , "<html xmlns=\"http://www.w3.org/1999/xhtml\">" + , "<head>" + , "<title>$title$</title>" + , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />" + , "</head>" + , "<body>" + , "$if(titlepage)$" + , "<h1 class=\"title\">$title$</h1>" + , "$for(author)$" + , "<h2 class=\"author\">$author$</h2>" + , "$endfor$" + , "$else$" + , "<h1>$title$</h1>" + , "$body$" + , "$endif$" + , "</body>" + , "</html>" + ] + diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 299471328..b8da4bec0 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -34,8 +34,9 @@ import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Pandoc.Readers.TeXMath -import Text.Pandoc.Highlighting ( highlightHtml ) +import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss ) import Text.Pandoc.XML (stripTags, escapeStringForXML) +import Network.HTTP ( urlEncode ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) @@ -104,7 +105,24 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do toc <- if writerTableOfContents opts then tableOfContents opts sects else return Nothing - blocks' <- liftM toHtmlFromList $ mapM (elementToHtml opts) sects + let startSlide = RawHtml "<div class=\"slide\">\n" + endSlide = RawHtml "</div>\n" + let cutUp (HorizontalRule : Header 1 ys : xs) = cutUp (Header 1 ys : xs) + cutUp (HorizontalRule : xs) = [endSlide, startSlide] ++ cutUp xs + cutUp (Header 1 ys : xs) = [endSlide, startSlide] ++ + (Header 1 ys : cutUp xs) + cutUp (x:xs) = x : cutUp xs + cutUp [] = [] + let slides = case blocks of + (HorizontalRule : xs) -> [startSlide] ++ cutUp xs ++ [endSlide] + (Header 1 ys : xs) -> [startSlide, Header 1 ys] ++ + cutUp xs ++ [endSlide] + _ -> [startSlide] ++ cutUp blocks ++ + [endSlide] + blocks' <- liftM toHtmlFromList $ + if writerSlideVariant opts `elem` [SlidySlides, S5Slides] + then mapM (blockToHtml opts) slides + else mapM (elementToHtml opts) sects st <- get let notes = reverse (stNotes st) let thebody = blocks' +++ footnoteSection notes @@ -116,6 +134,8 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do MathML (Just url) -> script ! [src url, thetype "text/javascript"] $ noHtml + MathJax url -> + script ! [src url, thetype "text/javascript"] $ noHtml JsMath (Just url) -> script ! [src url, thetype "text/javascript"] $ noHtml @@ -125,7 +145,8 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do primHtml s Nothing -> noHtml else noHtml - let newvars = [("highlighting","yes") | stHighlighting st] ++ + let newvars = [("highlighting-css", defaultHighlightingCss) | + stHighlighting st] ++ [("math", renderHtmlFragment math) | stMath st] return (tit, auths, date, toc, thebody, newvars) @@ -197,10 +218,16 @@ elementToHtml opts (Sec level num id' title' elements) = do innerContents <- mapM (elementToHtml opts) elements modify $ \st -> st{stSecNum = num} -- update section number header' <- blockToHtml opts (Header level title') - return $ if writerS5 opts || (writerStrictMarkdown opts && not (writerTableOfContents opts)) - -- S5 gets confused by the extra divs around sections - then toHtmlFromList (header' : innerContents) - else thediv ! [prefixedId opts id'] << (header' : innerContents) + let slides = writerSlideVariant opts `elem` [SlidySlides, S5Slides] + let header'' = header' ! [prefixedId opts id' | + not (writerStrictMarkdown opts || + writerSectionDivs opts || slides)] + let stuff = header'' : innerContents + return $ if slides -- S5 gets confused by the extra divs around sections + then toHtmlFromList stuff + else if writerSectionDivs opts + then thediv ! [prefixedId opts id'] << stuff + else toHtmlFromList stuff -- | Convert list of Note blocks to a footnote <div>. -- Assumes notes are sorted. @@ -285,15 +312,18 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do attrs = [theclass (unwords classes') | not (null classes')] ++ [prefixedId opts id' | not (null id')] ++ map (\(x,y) -> strAttr x y) keyvals + addBird = if "literate" `elem` classes' + then unlines . map ("> " ++) . lines + else unlines . lines in return $ pre ! attrs $ thecode << (replicate (length leadingBreaks) br +++ - [stringToHtml $ rawCode' ++ "\n"]) + [stringToHtml $ addBird rawCode']) Right h -> modify (\st -> st{ stHighlighting = True }) >> return h blockToHtml opts (BlockQuote blocks) = -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; -- otherwise incremental - if writerS5 opts + if writerSlideVariant opts /= NoSlides then let inc = not (writerIncremental opts) in case blocks of [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) @@ -436,26 +466,31 @@ inlineToHtml opts inline = stringToHtml "”") in do contents <- inlineListToHtml opts lst return $ leftQuote +++ contents +++ rightQuote - (Math t str) -> - modify (\st -> st {stMath = True}) >> + (Math t str) -> modify (\st -> st {stMath = True}) >> (case writerHTMLMathMethod opts of LaTeXMathML _ -> -- putting LaTeXMathML in container with class "LaTeX" prevents -- non-math elements on the page from being treated as math by -- the javascript return $ thespan ! [theclass "LaTeX"] $ - if t == InlineMath - then primHtml ("$" ++ str ++ "$") - else primHtml ("$$" ++ str ++ "$$") - JsMath _ -> - return $ if t == InlineMath - then thespan ! [theclass "math"] $ primHtml str - else thediv ! [theclass "math"] $ primHtml str - MimeTeX url -> - return $ image ! [src (url ++ "?" ++ str), - alt str, title str] + case t of + InlineMath -> primHtml ("$" ++ str ++ "$") + DisplayMath -> primHtml ("$$" ++ str ++ "$$") + JsMath _ -> do + let m = primHtml str + return $ case t of + InlineMath -> thespan ! [theclass "math"] $ m + DisplayMath -> thediv ! [theclass "math"] $ m + WebTeX url -> do + let m = image ! [src (url ++ urlEncode str), + alt str, title str] + return $ case t of + InlineMath -> m + DisplayMath -> br +++ m +++ br GladTeX -> - return $ primHtml $ "<EQ>" ++ str ++ "</EQ>" + return $ case t of + InlineMath -> primHtml $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" + DisplayMath -> primHtml $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" MathML _ -> do let dt = if t == InlineMath then DisplayInline @@ -466,12 +501,18 @@ inlineToHtml opts inline = Right r -> return $ primHtml $ ppcElement conf r Left _ -> inlineListToHtml opts - (readTeXMath str) >>= - return . (thespan ! - [theclass "math"]) - PlainMath -> - inlineListToHtml opts (readTeXMath str) >>= - return . (thespan ! [theclass "math"]) ) + (readTeXMath str) >>= return . + (thespan ! [theclass "math"]) + MathJax _ -> return $ primHtml $ + case t of + InlineMath -> "\\(" ++ str ++ "\\)" + DisplayMath -> "\\[" ++ str ++ "\\]" + PlainMath -> do + x <- inlineListToHtml opts (readTeXMath str) + let m = thespan ! [theclass "math"] $ x + return $ case t of + InlineMath -> m + DisplayMath -> br +++ m +++ br ) (TeX str) -> case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ primHtml str diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 720c00ac8..5fa345760 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -107,6 +107,8 @@ stringToLaTeX = escapeStringUsing latexEscapes , ('|', "\\textbar{}") , ('<', "\\textless{}") , ('>', "\\textgreater{}") + , ('[', "{[}") -- to avoid interpretation as + , (']', "{]}") -- optional arguments , ('\160', "~") ] diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index c74cd81f9..a46a18893 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -63,7 +63,7 @@ pandocToMan opts (Pandoc (Meta title authors date) blocks) = do body <- blockListToMan opts blocks notes <- liftM stNotes get notes' <- notesToMan opts (reverse notes) - let main = render $ body $$ notes' + let main = render $ body $$ notes' $$ text "" hasTables <- liftM stHasTables get let context = writerVariables opts ++ [ ("body", main) @@ -151,8 +151,12 @@ blockToMan opts (Header level inlines) = do _ -> ".SS " return $ text heading <> contents blockToMan _ (CodeBlock _ str) = return $ - text ".PP" $$ text "\\f[CR]" $$ - text ((unlines . map (" " ++) . lines) (escapeCode str)) <> text "\\f[]" + text ".IP" $$ + text ".nf" $$ + text "\\f[C]" $$ + text (escapeCode str) $$ + text "\\f[]" $$ + text ".fi" blockToMan opts (BlockQuote blocks) = do contents <- blockListToMan opts blocks return $ text ".RS" $$ contents $$ text ".RE" @@ -300,7 +304,7 @@ inlineToMan _ EnDash = return $ text "\\[en]" inlineToMan _ Apostrophe = return $ char '\'' inlineToMan _ Ellipses = return $ text "\\&..." inlineToMan _ (Code str) = - return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]" + return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" inlineToMan _ (Str str) = return $ text $ escapeString str inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str inlineToMan opts (Math DisplayMath str) = do diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d6876d239..1b612006b 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -32,9 +32,10 @@ Markdown: <http://daringfireball.net/projects/markdown/> module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Text.Pandoc.Definition import Text.Pandoc.Templates (renderTemplate) -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.Pandoc.Blocks -import Text.ParserCombinators.Parsec ( parse, GenParser ) +import Text.ParserCombinators.Parsec ( runParser, GenParser ) import Data.List ( group, isPrefixOf, find, intersperse, transpose ) import Text.PrettyPrint.HughesPJ hiding ( Str ) import Control.Monad.State @@ -95,7 +96,7 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do notes' <- notesToMarkdown opts (reverse $ stNotes st) st' <- get -- note that the notes may contain refs refs' <- refsToMarkdown opts (reverse $ stRefs st') - let main = render $ body $+$ text "" $+$ notes' $+$ text "" $+$ refs' + let main = render $ foldl ($+$) empty $ [body, notes', refs'] let context = writerVariables opts ++ [ ("toc", render toc) , ("body", main) @@ -158,7 +159,7 @@ elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++ else [BulletList $ map elementToListItem subsecs] -- | Ordered list start parser for use in Para below. -olMarker :: GenParser Char st Char +olMarker :: GenParser Char ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker if delim == Period && (style' == UpperAlpha || (style' == UpperRoman && @@ -169,7 +170,7 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker -- | True if string begins with an ordered list marker beginsWithOrderedListMarker :: String -> Bool beginsWithOrderedListMarker str = - case parse olMarker "para start" str of + case runParser olMarker defaultParserState "para start" str of Left _ -> False Right _ -> True @@ -238,7 +239,7 @@ blockToMarkdown opts (Table caption aligns widths headers rows) = do caption' <- inlineListToMarkdown opts caption let caption'' = if null caption then empty - else text "" $+$ (text "Table: " <> caption') + else text "" $+$ (text ": " <> caption') headers' <- mapM (blockListToMarkdown opts) headers let alignHeader alignment = case alignment of AlignLeft -> leftAlignBlock @@ -372,14 +373,14 @@ inlineToMarkdown opts (Subscript lst) = do inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ char '\'' <> contents <> char '\'' + return $ char '‘' <> contents <> char '’' inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ char '"' <> contents <> char '"' -inlineToMarkdown _ EmDash = return $ text "--" -inlineToMarkdown _ EnDash = return $ char '-' -inlineToMarkdown _ Apostrophe = return $ char '\'' -inlineToMarkdown _ Ellipses = return $ text "..." + return $ char '“' <> contents <> char '”' +inlineToMarkdown _ EmDash = return $ char '\8212' +inlineToMarkdown _ EnDash = return $ char '\8211' +inlineToMarkdown _ Apostrophe = return $ char '\8217' +inlineToMarkdown _ Ellipses = return $ char '\8230' inlineToMarkdown _ (Code str) = let tickGroups = filter (\s -> '`' `elem` s) $ group str longest = if null tickGroups diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs new file mode 100644 index 000000000..3b5ea7481 --- /dev/null +++ b/src/Text/Pandoc/Writers/Native.hs @@ -0,0 +1,86 @@ +{- +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Native + Copyright : Copyright (C) 2006-2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Utility functions and definitions used by the various Pandoc modules. +-} +module Text.Pandoc.Writers.Native ( writeNative ) +where +import Text.Pandoc.Shared ( WriterOptions ) +import Data.List ( intercalate ) +import Text.Pandoc.Definition + +-- | Indent string as a block. +indentBy :: Int -- ^ Number of spaces to indent the block + -> Int -- ^ Number of spaces (rel to block) to indent first line + -> String -- ^ Contents of block to indent + -> String +indentBy _ _ [] = "" +indentBy num first str = + let (firstLine:restLines) = lines str + firstLineIndent = num + first + in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ + (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines) + +-- | Prettyprint list of Pandoc blocks elements. +prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks + -> [Block] -- ^ List of blocks + -> String +prettyBlockList indent [] = indentBy indent 0 "[]" +prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ + (intercalate "\n, " (map prettyBlock blocks)) ++ " ]" + +-- | Prettyprint Pandoc block element. +prettyBlock :: Block -> String +prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ + (prettyBlockList 2 blocks) +prettyBlock (OrderedList attribs blockLists) = + "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ + (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks) + blockLists)) ++ " ]" +prettyBlock (BulletList blockLists) = "BulletList\n" ++ + indentBy 2 0 ("[ " ++ (intercalate ", " + (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock (DefinitionList items) = "DefinitionList\n" ++ + indentBy 2 0 ("[ " ++ (intercalate "\n, " + (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++ + indentBy 3 0 ("[ " ++ (intercalate ", " + (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++ + ")") items))) ++ " ]" +prettyBlock (Table caption aligns widths header rows) = + "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ + show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ + (intercalate ",\n" (map prettyRow rows)) ++ " ]" + where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", " + (map (\blocks -> prettyBlockList 2 blocks) + cols))) ++ " ]" +prettyBlock block = show block + +-- | Prettyprint Pandoc document. +writeNative :: WriterOptions -> Pandoc -> String +writeNative _ (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ + ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" + diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs new file mode 100644 index 000000000..5aa0fd310 --- /dev/null +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -0,0 +1,83 @@ +{- +Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.ODT + Copyright : Copyright (C) 2008-2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to ODT. +-} +module Text.Pandoc.Writers.ODT ( writeODT ) where +import Data.IORef +import System.FilePath ( (</>), takeExtension ) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 ( fromString ) +import Codec.Archive.Zip +import System.Time +import Paths_pandoc ( getDataFileName ) +import Text.Pandoc.Shared ( WriterOptions(..) ) +import Text.Pandoc.Definition +import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) +import System.Directory +import Control.Monad (liftM) + +-- | Produce an ODT file from a Pandoc document. +writeODT :: Maybe FilePath -- ^ Path specified by --reference-odt + -> WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> IO B.ByteString +writeODT mbRefOdt opts doc = do + let datadir = writerUserDataDir opts + refArchive <- liftM toArchive $ + case mbRefOdt of + Just f -> B.readFile f + Nothing -> do + let defaultODT = getDataFileName "reference.odt" >>= B.readFile + case datadir of + Nothing -> defaultODT + Just d -> do + exists <- doesFileExist (d </> "reference.odt") + if exists + then B.readFile (d </> "reference.odt") + else defaultODT + -- handle pictures + picEntriesRef <- newIORef ([] :: [Entry]) + let sourceDir = writerSourceDirectory opts + doc' <- processWithM (transformPic sourceDir picEntriesRef) doc + let newContents = writeOpenDocument opts doc' + (TOD epochtime _) <- getClockTime + let contentEntry = toEntry "content.xml" epochtime $ fromString newContents + picEntries <- readIORef picEntriesRef + let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries) + return $ fromArchive archive + +transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline +transformPic sourceDir entriesRef (Image lab (src,tit)) = do + entries <- readIORef entriesRef + let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src + catch (readEntry [] (sourceDir </> src) >>= \entry -> + modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >> + return (Image lab (newsrc, tit))) + (\_ -> return (Emph lab)) +transformPic _ _ x = return x + diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 680ec7749..e79f97b33 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -70,7 +70,7 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do refs <- liftM (reverse . stLinks) get >>= refsToRST pics <- liftM (reverse . stImages) get >>= pictRefsToRST hasMath <- liftM stHasMath get - let main = render $ body $+$ notes $+$ text "" $+$ refs $+$ pics + let main = render $ foldl ($+$) empty $ [body, notes, refs, pics] let context = writerVariables opts ++ [ ("body", main) , ("title", render title) @@ -282,16 +282,16 @@ inlineToRST (Subscript lst) = do inlineToRST (SmallCaps lst) = inlineListToRST lst inlineToRST (Quoted SingleQuote lst) = do contents <- inlineListToRST lst - return $ char '\'' <> contents <> char '\'' + return $ char '‘' <> contents <> char '’' inlineToRST (Quoted DoubleQuote lst) = do contents <- inlineListToRST lst - return $ char '"' <> contents <> char '"' + return $ char '“' <> contents <> char '”' inlineToRST (Cite _ lst) = inlineListToRST lst -inlineToRST EmDash = return $ text "--" -inlineToRST EnDash = return $ char '-' -inlineToRST Apostrophe = return $ char '\'' -inlineToRST Ellipses = return $ text "..." +inlineToRST EmDash = return $ char '\8212' +inlineToRST EnDash = return $ char '\8211' +inlineToRST Apostrophe = return $ char '\8217' +inlineToRST Ellipses = return $ char '\8230' inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``" inlineToRST (Str str) = return $ text $ escapeString str inlineToRST (Math t str) = do diff --git a/src/Text/Pandoc/Writers/S5.hs b/src/Text/Pandoc/Writers/S5.hs deleted file mode 100644 index 1a2639a50..000000000 --- a/src/Text/Pandoc/Writers/S5.hs +++ /dev/null @@ -1,136 +0,0 @@ -{- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Writers.S5 - Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Definitions for creation of S5 powerpoint-like HTML. -(See <http://meyerweb.com/eric/tools/s5/>.) --} -module Text.Pandoc.Writers.S5 ( - -- * Header includes - s5HeaderIncludes, - s5Links, - -- * Functions - writeS5, - writeS5String, - insertS5Structure - ) where -import Text.Pandoc.Shared ( WriterOptions, readDataFile ) -import Text.Pandoc.Writers.HTML ( writeHtml, writeHtmlString ) -import Text.Pandoc.Definition -import Text.XHtml.Strict -import System.FilePath ( (</>) ) -import Data.List ( intercalate ) - -s5HeaderIncludes :: Maybe FilePath -> IO String -s5HeaderIncludes datadir = do - c <- s5CSS datadir - j <- s5Javascript datadir - return $ s5Meta ++ c ++ j - -s5Meta :: String -s5Meta = "<!-- configuration parameters -->\n<meta name=\"defaultView\" content=\"slideshow\" />\n<meta name=\"controlVis\" content=\"hidden\" />\n" - -s5Javascript :: Maybe FilePath -> IO String -s5Javascript datadir = do - jsCom <- readDataFile datadir $ "s5" </> "default" </> "slides.js.comment" - jsPacked <- readDataFile datadir $ "s5" </> "default" </> "slides.js.packed" - return $ "<script type=\"text/javascript\">\n" ++ jsCom ++ jsPacked ++ - "</script>\n" - -s5CSS :: Maybe FilePath -> IO String -s5CSS datadir = do - s5CoreCSS <- readDataFile datadir $ "s5" </> "default" </> "s5-core.css" - s5FramingCSS <- readDataFile datadir $ "s5" </> "default" </> "framing.css" - s5PrettyCSS <- readDataFile datadir $ "s5" </> "default" </> "pretty.css" - s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css" - s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css" - s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css" - return $ "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n" - -s5Links :: String -s5Links = "<!-- style sheet links -->\n<link rel=\"stylesheet\" href=\"ui/default/slides.css\" type=\"text/css\" media=\"projection\" id=\"slideProj\" />\n<link rel=\"stylesheet\" href=\"ui/default/outline.css\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" />\n<link rel=\"stylesheet\" href=\"ui/default/print.css\" type=\"text/css\" media=\"print\" id=\"slidePrint\" />\n<link rel=\"stylesheet\" href=\"ui/default/opera.css\" type=\"text/css\" media=\"projection\" id=\"operaFix\" />\n<!-- S5 JS -->\n<script src=\"ui/default/slides.js\" type=\"text/javascript\"></script>\n" - --- | Converts Pandoc document to an S5 HTML presentation (Html structure). -writeS5 :: WriterOptions -> Pandoc -> Html -writeS5 options = (writeHtml options) . insertS5Structure - --- | Converts Pandoc document to an S5 HTML presentation (string). -writeS5String :: WriterOptions -> Pandoc -> String -writeS5String options = (writeHtmlString options) . insertS5Structure - --- | Inserts HTML needed for an S5 presentation (e.g. around slides). -layoutDiv :: [Inline] -- ^ Title of document (for header or footer) - -> [Inline] -- ^ Date of document (for header or footer) - -> [Block] -- ^ List of block elements returned -layoutDiv title' date = [(RawHtml "<div class=\"layout\">\n<div id=\"controls\"></div>\n<div id=\"currentSlide\"></div>\n<div id=\"header\"></div>\n<div id=\"footer\">\n"), (Header 1 date), (Header 2 title'), (RawHtml "</div>\n</div>\n")] - -presentationStart :: Block -presentationStart = RawHtml "<div class=\"presentation\">\n\n" - -presentationEnd :: Block -presentationEnd = RawHtml "</div>\n" - -slideStart :: Block -slideStart = RawHtml "<div class=\"slide\">\n" - -slideEnd :: Block -slideEnd = RawHtml "</div>\n" - --- | Returns 'True' if block is a Header 1. -isH1 :: Block -> Bool -isH1 (Header 1 _) = True -isH1 _ = False - --- | Insert HTML around sections to make individual slides. -insertSlides :: Bool -> [Block] -> [Block] -insertSlides beginning blocks = - let (beforeHead, rest) = break isH1 blocks in - if (null rest) then - if beginning then - beforeHead - else - beforeHead ++ [slideEnd] - else - if beginning then - beforeHead ++ - slideStart:(head rest):(insertSlides False (tail rest)) - else - beforeHead ++ - slideEnd:slideStart:(head rest):(insertSlides False (tail rest)) - --- | Insert blocks into 'Pandoc' for slide structure. -insertS5Structure :: Pandoc -> Pandoc -insertS5Structure (Pandoc meta' []) = Pandoc meta' [] -insertS5Structure (Pandoc (Meta title' authors date) blocks) = - let slides = insertSlides True blocks - firstSlide = if not (null title') - then [slideStart, (Header 1 title'), - (Header 3 (intercalate [LineBreak] authors)), - (Header 4 date), slideEnd] - else [] - newBlocks = (layoutDiv title' date) ++ presentationStart:firstSlide ++ - slides ++ [presentationEnd] - in Pandoc (Meta title' authors date) newBlocks diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 503222754..65e053827 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -144,6 +144,7 @@ blockToTexinfo (OrderedList (start, numstyle, _) lst) = do exemplar = case numstyle of DefaultStyle -> decimal Decimal -> decimal + Example -> decimal UpperRoman -> decimal -- Roman numerals not supported LowerRoman -> decimal UpperAlpha -> upperAlpha diff --git a/src/markdown2pdf.hs b/src/markdown2pdf.hs index c47bcf3c0..cc6a034c0 100644 --- a/src/markdown2pdf.hs +++ b/src/markdown2pdf.hs @@ -154,7 +154,8 @@ main = bracket "--number-sections","--include-in-header", "--include-before-body","--include-after-body", "--custom-header","--output", - "--template", "--variable"] + "--template", "--variable", + "--csl", "--biblio", "--biblio-format"] let isOpt ('-':_) = True isOpt _ = False let opts = filter isOpt args diff --git a/src/pandoc.hs b/src/pandoc.hs index 92fc3db1f..4f5a1c32a 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -30,9 +30,9 @@ writers. -} module Main where import Text.Pandoc -import Text.Pandoc.ODT -import Text.Pandoc.Writers.S5 (s5HeaderIncludes) -import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile ) +import Text.Pandoc.S5 (s5HeaderIncludes) +import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, + headerShift ) #ifdef _HIGHLIGHTING import Text.Pandoc.Highlighting ( languages ) #endif @@ -40,7 +40,6 @@ import System.Environment ( getArgs, getProgName, getEnvironment ) import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt -import Data.Maybe ( fromMaybe ) import Data.Char ( toLower, isDigit ) import Data.List ( intercalate, isSuffixOf ) import System.Directory ( getAppUserDataDirectory ) @@ -52,9 +51,10 @@ import Text.Pandoc.Biblio #endif import Control.Monad (when, unless, liftM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) -import Network.URI (parseURI, isURI) -import Data.ByteString.Lazy.UTF8 (toString) -import Codec.Binary.UTF8.String (decodeString) +import Network.URI (parseURI, isURI, URI(..)) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 (toString, fromString) +import Codec.Binary.UTF8.String (decodeString, encodeString) copyrightMessage :: String copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++ @@ -102,13 +102,15 @@ readPandoc _ = read -- | Association list of formats and writers. writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] -writers = [("native" , writeDoc) +writers = [("native" , writeNative) ,("html" , writeHtmlString) ,("html+lhs" , writeHtmlString) - ,("s5" , writeS5String) + ,("s5" , writeHtmlString) + ,("slidy" , writeHtmlString) ,("docbook" , writeDocbook) ,("opendocument" , writeOpenDocument) - ,("odt" , writeOpenDocument) + ,("odt" , \_ _ -> "") + ,("epub" , \_ _ -> "") ,("latex" , writeLaTeX) ,("latex+lhs" , writeLaTeX) ,("context" , writeConTeXt) @@ -125,17 +127,7 @@ writers = [("native" , writeDoc) ] isNonTextOutput :: String -> Bool -isNonTextOutput = (`elem` ["odt"]) - --- | Writer for Pandoc native format. -writeDoc :: WriterOptions -> Pandoc -> String -writeDoc _ = prettyPandoc - -headerShift :: Int -> Pandoc -> Pandoc -headerShift n = processWith shift - where shift :: Block -> Block - shift (Header level inner) = Header (level + n) inner - shift x = x +isNonTextOutput = (`elem` ["odt","epub"]) -- | Data structure for command line options. data Opt = Opt @@ -149,15 +141,17 @@ data Opt = Opt , optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply , optTemplate :: String -- ^ Custom template , optVariables :: [(String,String)] -- ^ Template variables to set - , optBefore :: [String] -- ^ Texts to include before body - , optAfter :: [String] -- ^ Texts to include after body , optOutputFile :: String -- ^ Name of output file , optNumberSections :: Bool -- ^ Number sections in LaTeX - , optIncremental :: Bool -- ^ Use incremental lists in S5 + , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML + , optIncremental :: Bool -- ^ Use incremental lists in Slidy/S5 + , optOffline :: Bool -- ^ Make slideshow accessible offline , optXeTeX :: Bool -- ^ Format latex for xetex , optSmart :: Bool -- ^ Use smart typography , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt + , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet + , optEPUBMetadata :: String -- ^ EPUB metadata , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optStrict :: Bool -- ^ Use strict markdown syntax @@ -189,15 +183,17 @@ defaultOpts = Opt , optTransforms = [] , optTemplate = "" , optVariables = [] - , optBefore = [] - , optAfter = [] , optOutputFile = "-" -- "-" means stdout , optNumberSections = False + , optSectionDivs = False , optIncremental = False + , optOffline = False , optXeTeX = False , optSmart = False , optHTMLMathMethod = PlainMath , optReferenceODT = Nothing + , optEPUBStylesheet = Nothing + , optEPUBMetadata = "" , optDumpArgs = False , optIgnoreArgs = False , optStrict = False @@ -290,17 +286,36 @@ options = , Option "" ["mimetex"] (OptArg - (\arg opt -> return opt { optHTMLMathMethod = MimeTeX - (fromMaybe "/cgi-bin/mimetex.cgi" arg)}) + (\arg opt -> do + let url' = case arg of + Just u -> u ++ "?" + Nothing -> "/cgi-bin/mimetex.cgi?" + return opt { optHTMLMathMethod = WebTeX url' }) "URL") "" -- "Use mimetex for HTML math" + , Option "" ["webtex"] + (OptArg + (\arg opt -> do + let url' = case arg of + Just u -> u + Nothing -> "http://chart.apis.google.com/chart?cht=tx&chl=" + return opt { optHTMLMathMethod = WebTeX url' }) + "URL") + "" -- "Use web service for HTML math" + , Option "" ["jsmath"] (OptArg (\arg opt -> return opt { optHTMLMathMethod = JsMath arg}) "URL") "" -- "Use jsMath for HTML math" + , Option "" ["mathjax"] + (ReqArg + (\arg opt -> return opt { optHTMLMathMethod = MathJax arg}) + "URL") + "" -- "Use MathJax for HTML math" + , Option "" ["gladtex"] (NoArg (\opt -> return opt { optHTMLMathMethod = GladTeX })) @@ -309,7 +324,13 @@ options = , Option "i" ["incremental"] (NoArg (\opt -> return opt { optIncremental = True })) - "" -- "Make list items display incrementally in S5" + "" -- "Make list items display incrementally in Slidy/S5" + + , Option "" ["offline"] + (NoArg + (\opt -> return opt { optOffline = True, + optStandalone = True })) + "" -- "Make slide shows include all the needed js and css" , Option "" ["xetex"] (NoArg @@ -321,6 +342,11 @@ options = (\opt -> return opt { optNumberSections = True })) "" -- "Number sections in LaTeX" + , Option "" ["section-divs"] + (NoArg + (\opt -> return opt { optSectionDivs = True })) + "" -- "Put sections in div tags in HTML" + , Option "" ["no-wrap"] (NoArg (\opt -> return opt { optWrapText = False })) @@ -396,7 +422,7 @@ options = _ -> do UTF8.hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)" exitWith $ ExitFailure 17) - "FILENAME") + "KEY:VALUE") "" -- "Use custom template" , Option "c" ["css"] @@ -468,6 +494,22 @@ options = "FILENAME") "" -- "Path of custom reference.odt" + , Option "" ["epub-stylesheet"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + return opt { optEPUBStylesheet = Just text }) + "FILENAME") + "" -- "Path of epub.css" + + , Option "" ["epub-metadata"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + return opt { optEPUBMetadata = text }) + "FILENAME") + "" -- "Path of epub metadata file" + , Option "D" ["print-default-template"] (ReqArg (\arg _ -> do @@ -581,6 +623,7 @@ defaultWriterName x = ".texinfo" -> "texinfo" ".db" -> "docbook" ".odt" -> "odt" + ".epub" -> "epub" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" @@ -617,18 +660,20 @@ main = do , optWriter = writerName , optParseRaw = parseRaw , optVariables = variables - , optBefore = befores - , optAfter = afters , optTableOfContents = toc , optTransforms = transforms , optTemplate = template , optOutputFile = outputFile , optNumberSections = numberSections + , optSectionDivs = sectionDivs , optIncremental = incremental + , optOffline = offline , optXeTeX = xetex , optSmart = smart , optHTMLMathMethod = mathMethod , optReferenceODT = referenceODT + , optEPUBStylesheet = epubStylesheet + , optEPUBMetadata = epubMetadata , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs , optStrict = strict @@ -682,9 +727,13 @@ main = do Just r -> return r Nothing -> error ("Unknown reader: " ++ readerName') - writer <- case (lookup writerName' writers) of - Just r -> return r - Nothing -> error ("Unknown writer: " ++ writerName') + let writer = case lookup writerName' writers of + Just _ | writerName' == "epub" -> writeEPUB epubStylesheet + Just _ | writerName' == "odt" -> writeODT referenceODT + Just r -> \o -> + return . fromString . r o + Nothing -> error $ "Unknown writer: " ++ + writerName' templ <- getDefaultTemplate datadir writerName' let defaultTemplate = case templ of @@ -702,11 +751,18 @@ main = do refs <- if null biblioFile then return [] else readBiblioFile biblioFile biblioFormat #endif - variables' <- if writerName' == "s5" && standalone' - then do - inc <- s5HeaderIncludes datadir - return $ ("header-includes", inc) : variables - else return variables + variables' <- case (writerName', standalone', offline) of + ("s5", True, True) -> do + inc <- s5HeaderIncludes datadir + return $ ("s5includes", inc) : variables + ("slidy", True, True) -> do + slidyJs <- readDataFile datadir $ + "slidy" </> "slidy.min.js" + slidyCss <- readDataFile datadir $ + "slidy" </> "slidy.min.css" + return $ ("slidy-js", slidyJs) : + ("slidy-css", slidyCss) : variables + _ -> return variables variables'' <- case mathMethod of LaTeXMathML Nothing -> do @@ -717,6 +773,15 @@ main = do return $ ("mathml-script", s) : variables' _ -> return variables' + let sourceDir = if null sources + then "." + else takeDirectory (head sources) + + let slideVariant = case writerName' of + "s5" -> S5Slides + "slidy" -> SlidySlides + _ -> NoSlides + let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, @@ -728,26 +793,28 @@ main = do stateCitations = map citeKey refs, #endif stateSmart = smart || writerName' `elem` - ["latex", "context", "man"], + ["latex", "context", "latex+lhs", "man"], stateColumns = columns, stateStrict = strict, - stateIndentedCodeClasses = codeBlockClasses } + stateIndentedCodeClasses = codeBlockClasses, + stateApplyMacros = writerName' `notElem` ["latex", "latex+lhs"] } + let writerOptions = WriterOptions { writerStandalone = standalone', writerTemplate = if null template then defaultTemplate else template, writerVariables = variables'', - writerIncludeBefore = concat befores, - writerIncludeAfter = concat afters, + writerEPUBMetadata = epubMetadata, writerTabStop = tabStop, writerTableOfContents = toc && writerName' /= "s5", writerHTMLMathMethod = mathMethod, - writerS5 = (writerName' == "s5"), + writerSlideVariant = slideVariant, + writerIncremental = incremental, writerXeTeX = xetex, writerIgnoreNotes = False, - writerIncremental = incremental, writerNumberSections = numberSections, + writerSectionDivs = sectionDivs, writerStrictMarkdown = strict, writerReferenceLinks = referenceLinks, writerWrapText = wrap, @@ -756,23 +823,22 @@ main = do writerEmailObfuscation = if strict then ReferenceObfuscation else obfuscationMethod, - writerIdentifierPrefix = idPrefix } + writerIdentifierPrefix = idPrefix, + writerSourceDirectory = sourceDir, + writerUserDataDir = datadir } when (isNonTextOutput writerName' && outputFile == "-") $ do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++ "Specify an output file using the -o option.") exitWith $ ExitFailure 5 - let sourceDirRelative = if null sources - then "" - else takeDirectory (head sources) - let readSources [] = mapM readSource ["-"] readSources srcs = mapM readSource srcs readSource "-" = UTF8.getContents readSource src = case parseURI src of - Just u -> readURI u - Nothing -> UTF8.readFile src + Just u | uriScheme u `elem` ["http:","https:"] -> + readURI u + _ -> UTF8.readFile src readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>= return . toString -- treat all as UTF8 @@ -789,10 +855,8 @@ main = do return doc' #endif - let writerOutput = writer writerOptions doc'' ++ "\n" + writerOutput <- writer writerOptions doc'' - case writerName' of - "odt" -> saveOpenDocumentAsODT datadir outputFile sourceDirRelative referenceODT writerOutput - _ -> if outputFile == "-" - then UTF8.putStr writerOutput - else UTF8.writeFile outputFile writerOutput + if outputFile == "-" + then B.putStr writerOutput + else B.writeFile (encodeString outputFile) writerOutput |