diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Definition.hs | 18 | ||||
-rw-r--r-- | src/Text/Pandoc/ODT.hs | 102 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 698 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 106 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 176 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/TeXMath.hs | 250 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 735 | ||||
-rw-r--r-- | src/Text/Pandoc/Templates.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/UTF8.hs | 72 | ||||
-rw-r--r-- | src/Text/Pandoc/UUID.hs | 77 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/EPUB.hs | 283 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Man.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Markdown.hs | 25 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Native.hs | 86 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ODT.hs | 83 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 30 | ||||
-rw-r--r-- | src/markdown2pdf.hs | 26 | ||||
-rw-r--r-- | src/pandoc.hs | 139 |
23 files changed, 1659 insertions, 1286 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 9cad5fb34..2b6474b9f 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -71,6 +71,7 @@ module Text.Pandoc , NoteTable , HeaderType (..) -- * Writers: converting /from/ Pandoc format + , writeNative , writeMarkdown , writePlain , writeRST @@ -86,7 +87,8 @@ module Text.Pandoc , writeMan , writeMediaWiki , writeRTF - , prettyPandoc + , writeODT + , writeEPUB -- * Writer options used in writers , WriterOptions (..) , HTMLMathMethod (..) @@ -102,6 +104,7 @@ import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.HTML +import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST import Text.Pandoc.Writers.LaTeX @@ -109,12 +112,15 @@ import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.Texinfo import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.S5 +import Text.Pandoc.Writers.ODT +import Text.Pandoc.Writers.EPUB import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OpenDocument import Text.Pandoc.Writers.Man import Text.Pandoc.Writers.RTF import Text.Pandoc.Writers.MediaWiki import Text.Pandoc.Templates +import Text.Pandoc.Parsing import Text.Pandoc.Shared import Data.Version (showVersion) import Paths_pandoc (version) diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs index c8ba9249b..fffca3b2e 100644 --- a/src/Text/Pandoc/Definition.hs +++ b/src/Text/Pandoc/Definition.hs @@ -33,19 +33,19 @@ module Text.Pandoc.Definition where import Data.Generics -data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show, Typeable, Data) +data Pandoc = Pandoc Meta [Block] deriving (Eq, Ord, Read, Show, Typeable, Data) -- | Bibliographic information for the document: title, authors, date. data Meta = Meta { docTitle :: [Inline] , docAuthors :: [[Inline]] , docDate :: [Inline] } - deriving (Eq, Show, Read, Typeable, Data) + deriving (Eq, Ord, Show, Read, Typeable, Data) -- | Alignment of a table column. data Alignment = AlignLeft | AlignRight | AlignCenter - | AlignDefault deriving (Eq, Show, Read, Typeable, Data) + | AlignDefault deriving (Eq, Ord, Show, Read, Typeable, Data) -- | List attributes. type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) @@ -57,13 +57,13 @@ data ListNumberStyle = DefaultStyle | LowerRoman | UpperRoman | LowerAlpha - | UpperAlpha deriving (Eq, Show, Read, Typeable, Data) + | UpperAlpha deriving (Eq, Ord, Show, Read, Typeable, Data) -- | Delimiter of list numbers. data ListNumberDelim = DefaultDelim | Period | OneParen - | TwoParens deriving (Eq, Show, Read, Typeable, Data) + | TwoParens deriving (Eq, Ord, Show, Read, Typeable, Data) -- | Attributes: identifier, classes, key-value pairs type Attr = (String, [String], [(String, String)]) @@ -91,16 +91,16 @@ data Block -- column headers (each a list of blocks), and -- rows (each a list of lists of blocks) | Null -- ^ Nothing - deriving (Eq, Read, Show, Typeable, Data) + deriving (Eq, Ord, Read, Show, Typeable, Data) -- | Type of quotation marks to use in Quoted inline. -data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read, Typeable, Data) +data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Ord, Read, Typeable, Data) -- | Link target (URL, title). type Target = (String, String) -- | Type of math element (display or inline). -data MathType = DisplayMath | InlineMath deriving (Show, Eq, Read, Typeable, Data) +data MathType = DisplayMath | InlineMath deriving (Show, Eq, Ord, Read, Typeable, Data) -- | Inline elements. data Inline @@ -127,7 +127,7 @@ data Inline | Image [Inline] Target -- ^ Image: alt text (list of inlines), target -- and target | Note [Block] -- ^ Footnote or endnote - deriving (Show, Eq, Read, Typeable, Data) + deriving (Show, Eq, Ord, Read, Typeable, Data) -- | Applies a transformation on @a@s to matching elements in a @b@. processWith :: (Data a, Data b) => (a -> a) -> b -> b diff --git a/src/Text/Pandoc/ODT.hs b/src/Text/Pandoc/ODT.hs deleted file mode 100644 index d978c0cb4..000000000 --- a/src/Text/Pandoc/ODT.hs +++ /dev/null @@ -1,102 +0,0 @@ -{- -Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.ODT - Copyright : Copyright (C) 2008-2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for producing an ODT file from OpenDocument XML. --} -module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where -import Data.List ( find ) -import System.FilePath ( (</>), takeFileName ) -import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy.UTF8 ( fromString ) -import Prelude hiding ( writeFile, readFile ) -import Codec.Archive.Zip -import Control.Applicative ( (<$>) ) -import Text.ParserCombinators.Parsec -import System.Time -import Paths_pandoc ( getDataFileName ) -import System.Directory -import Control.Monad (liftM) - --- | Produce an ODT file from OpenDocument XML. -saveOpenDocumentAsODT :: Maybe FilePath -- ^ Path of user data directory - -> FilePath -- ^ Pathname of ODT file to be produced - -> FilePath -- ^ Relative directory of source file - -> Maybe FilePath -- ^ Path specified by --reference-odt - -> String -- ^ OpenDocument XML contents - -> IO () -saveOpenDocumentAsODT datadir destinationODTPath sourceDirRelative mbRefOdt xml = do - refArchive <- liftM toArchive $ - case mbRefOdt of - Just f -> B.readFile f - Nothing -> do - let defaultODT = getDataFileName "reference.odt" >>= B.readFile - case datadir of - Nothing -> defaultODT - Just d -> do - exists <- doesFileExist (d </> "reference.odt") - if exists - then B.readFile (d </> "reference.odt") - else defaultODT - -- handle pictures - let (newContents, pics) = - case runParser pPictures [] "OpenDocument XML contents" xml of - Left err -> error $ show err - Right x -> x - picEntries <- mapM (makePictureEntry sourceDirRelative) pics - (TOD epochTime _) <- getClockTime - let contentEntry = toEntry "content.xml" epochTime $ fromString newContents - let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries) - B.writeFile destinationODTPath $ fromArchive archive - -makePictureEntry :: FilePath -- ^ Relative directory of source file - -> (FilePath, String) -- ^ Path and new path of picture - -> IO Entry -makePictureEntry sourceDirRelative (path, newPath) = do - entry <- readEntry [] $ sourceDirRelative </> path - return (entry { eRelativePath = newPath }) - -pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)]) -pPictures = do - contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<") - pics <- getState - return (contents, pics) - -pPicture :: GenParser Char [(FilePath, String)] [Char] -pPicture = try $ do - string "<draw:image xlink:href=\"" - path <- manyTill anyChar (char '"') - let filename = takeFileName path - pics <- getState - newPath <- case find (\(o, _) -> o == path) pics of - Just (_, new) -> return new - Nothing -> do - -- get a unique name - let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics - let new = "Pictures/" ++ replicate dups '0' ++ filename - updateState ((path, new) :) - return new - return $ "<draw:image xlink:href=\"" ++ newPath ++ "\"" diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs new file mode 100644 index 000000000..19338661d --- /dev/null +++ b/src/Text/Pandoc/Parsing.hs @@ -0,0 +1,698 @@ +{- +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Parsing + Copyright : Copyright (C) 2006-2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +A utility library with parsers used in pandoc readers. +-} +module Text.Pandoc.Parsing ( (>>~), + anyLine, + many1Till, + notFollowedBy', + oneOfStrings, + spaceChar, + skipSpaces, + blankline, + blanklines, + enclosed, + stringAnyCase, + parseFromString, + lineClump, + charsInBalanced, + charsInBalanced', + romanNumeral, + emailAddress, + uri, + withHorizDisplacement, + nullBlock, + failIfStrict, + failUnlessLHS, + escaped, + anyOrderedListMarker, + orderedListMarker, + charRef, + tableWith, + gridTableWith, + readWith, + testStringWith, + ParserState (..), + defaultParserState, + HeaderType (..), + ParserContext (..), + QuoteContext (..), + NoteTable, + KeyTable, + Key (..), + lookupKeySrc, + refsMatch ) +where + +import Text.Pandoc.Definition +import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) +import Text.ParserCombinators.Parsec +import Text.Pandoc.CharacterReferences ( characterReference ) +import Data.Char ( toLower, toUpper, ord, isAscii ) +import Data.List ( intercalate, transpose ) +import Network.URI ( parseURI, URI (..), isAllowedInURI ) +import Control.Monad ( join, liftM ) +import Text.Pandoc.Shared +import qualified Data.Map as M + +-- | Like >>, but returns the operation on the left. +-- (Suggested by Tillmann Rendel on Haskell-cafe list.) +(>>~) :: (Monad m) => m a -> m b -> m a +a >>~ b = a >>= \x -> b >> return x + +-- | Parse any line of text +anyLine :: GenParser Char st [Char] +anyLine = manyTill anyChar newline + +-- | Like @manyTill@, but reads at least one item. +many1Till :: GenParser tok st a + -> GenParser tok st end + -> GenParser tok st [a] +many1Till p end = do + first <- p + rest <- manyTill p end + return (first:rest) + +-- | A more general form of @notFollowedBy@. This one allows any +-- type of parser to be specified, and succeeds only if that parser fails. +-- It does not consume any input. +notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () +notFollowedBy' p = try $ join $ do a <- try p + return (unexpected (show a)) + <|> + return (return ()) +-- (This version due to Andrew Pimlott on the Haskell mailing list.) + +-- | Parses one of a list of strings (tried in order). +oneOfStrings :: [String] -> GenParser Char st String +oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings + +-- | Parses a space or tab. +spaceChar :: CharParser st Char +spaceChar = char ' ' <|> char '\t' + +-- | Skips zero or more spaces or tabs. +skipSpaces :: GenParser Char st () +skipSpaces = skipMany spaceChar + +-- | Skips zero or more spaces or tabs, then reads a newline. +blankline :: GenParser Char st Char +blankline = try $ skipSpaces >> newline + +-- | Parses one or more blank lines and returns a string of newlines. +blanklines :: GenParser Char st [Char] +blanklines = many1 blankline + +-- | Parses material enclosed between start and end parsers. +enclosed :: GenParser Char st t -- ^ start parser + -> GenParser Char st end -- ^ end parser + -> GenParser Char st a -- ^ content parser (to be used repeatedly) + -> GenParser Char st [a] +enclosed start end parser = try $ + start >> notFollowedBy space >> many1Till parser end + +-- | Parse string, case insensitive. +stringAnyCase :: [Char] -> CharParser st String +stringAnyCase [] = string "" +stringAnyCase (x:xs) = do + firstChar <- char (toUpper x) <|> char (toLower x) + rest <- stringAnyCase xs + return (firstChar:rest) + +-- | Parse contents of 'str' using 'parser' and return result. +parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a +parseFromString parser str = do + oldPos <- getPosition + oldInput <- getInput + setInput str + result <- parser + setInput oldInput + setPosition oldPos + return result + +-- | Parse raw line block up to and including blank lines. +lineClump :: GenParser Char st String +lineClump = blanklines + <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) + +-- | Parse a string of characters between an open character +-- and a close character, including text between balanced +-- pairs of open and close, which must be different. For example, +-- @charsInBalanced '(' ')'@ will parse "(hello (there))" +-- and return "hello (there)". Stop if a blank line is +-- encountered. +charsInBalanced :: Char -> Char -> GenParser Char st String +charsInBalanced open close = try $ do + char open + raw <- many $ (many1 (noneOf [open, close, '\n'])) + <|> (do res <- charsInBalanced open close + return $ [open] ++ res ++ [close]) + <|> try (string "\n" >>~ notFollowedBy' blanklines) + char close + return $ concat raw + +-- | Like @charsInBalanced@, but allow blank lines in the content. +charsInBalanced' :: Char -> Char -> GenParser Char st String +charsInBalanced' open close = try $ do + char open + raw <- many $ (many1 (noneOf [open, close])) + <|> (do res <- charsInBalanced' open close + return $ [open] ++ res ++ [close]) + char close + return $ concat raw + +-- Auxiliary functions for romanNumeral: + +lowercaseRomanDigits :: [Char] +lowercaseRomanDigits = ['i','v','x','l','c','d','m'] + +uppercaseRomanDigits :: [Char] +uppercaseRomanDigits = map toUpper lowercaseRomanDigits + +-- | Parses a roman numeral (uppercase or lowercase), returns number. +romanNumeral :: Bool -- ^ Uppercase if true + -> GenParser Char st Int +romanNumeral upperCase = do + let romanDigits = if upperCase + then uppercaseRomanDigits + else lowercaseRomanDigits + lookAhead $ oneOf romanDigits + let [one, five, ten, fifty, hundred, fivehundred, thousand] = + map char romanDigits + thousands <- many thousand >>= (return . (1000 *) . length) + ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 + fivehundreds <- many fivehundred >>= (return . (500 *) . length) + fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 + hundreds <- many hundred >>= (return . (100 *) . length) + nineties <- option 0 $ try $ ten >> hundred >> return 90 + fifties <- many fifty >>= (return . (50 *) . length) + forties <- option 0 $ try $ ten >> fifty >> return 40 + tens <- many ten >>= (return . (10 *) . length) + nines <- option 0 $ try $ one >> ten >> return 9 + fives <- many five >>= (return . (5 *) . length) + fours <- option 0 $ try $ one >> five >> return 4 + ones <- many one >>= (return . length) + let total = thousands + ninehundreds + fivehundreds + fourhundreds + + hundreds + nineties + fifties + forties + tens + nines + + fives + fours + ones + if total == 0 + then fail "not a roman numeral" + else return total + +-- Parsers for email addresses and URIs + +emailChar :: GenParser Char st Char +emailChar = alphaNum <|> oneOf "-+_." + +domainChar :: GenParser Char st Char +domainChar = alphaNum <|> char '-' + +domain :: GenParser Char st [Char] +domain = do + first <- many1 domainChar + dom <- many1 $ try (char '.' >> many1 domainChar ) + return $ intercalate "." (first:dom) + +-- | Parses an email address; returns original and corresponding +-- escaped mailto: URI. +emailAddress :: GenParser Char st (String, String) +emailAddress = try $ do + firstLetter <- alphaNum + restAddr <- many emailChar + let addr = firstLetter:restAddr + char '@' + dom <- domain + let full = addr ++ '@':dom + return (full, escapeURI $ "mailto:" ++ full) + +-- | Parses a URI. Returns pair of original and URI-escaped version. +uri :: GenParser Char st (String, String) +uri = try $ do + let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ] + lookAhead $ oneOfStrings protocols + -- scan non-ascii characters and ascii characters allowed in a URI + str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c) + -- now see if they amount to an absolute URI + case parseURI (escapeURI str) of + Just uri' -> if uriScheme uri' `elem` protocols + then return (str, show uri') + else fail "not a URI" + Nothing -> fail "not a URI" + +-- | Applies a parser, returns tuple of its results and its horizontal +-- displacement (the difference between the source column at the end +-- and the source column at the beginning). Vertical displacement +-- (source row) is ignored. +withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply + -> GenParser Char st (a, Int) -- ^ (result, displacement) +withHorizDisplacement parser = do + pos1 <- getPosition + result <- parser + pos2 <- getPosition + return (result, sourceColumn pos2 - sourceColumn pos1) + +-- | Parses a character and returns 'Null' (so that the parser can move on +-- if it gets stuck). +nullBlock :: GenParser Char st Block +nullBlock = anyChar >> return Null + +-- | Fail if reader is in strict markdown syntax mode. +failIfStrict :: GenParser Char ParserState () +failIfStrict = do + state <- getState + if stateStrict state then fail "strict mode" else return () + +-- | Fail unless we're in literate haskell mode. +failUnlessLHS :: GenParser tok ParserState () +failUnlessLHS = do + state <- getState + if stateLiterateHaskell state then return () else fail "Literate haskell feature" + +-- | Parses backslash, then applies character parser. +escaped :: GenParser Char st Char -- ^ Parser for character to escape + -> GenParser Char st Inline +escaped parser = try $ do + char '\\' + result <- parser + return (Str [result]) + +-- | Parses an uppercase roman numeral and returns (UpperRoman, number). +upperRoman :: GenParser Char st (ListNumberStyle, Int) +upperRoman = do + num <- romanNumeral True + return (UpperRoman, num) + +-- | Parses a lowercase roman numeral and returns (LowerRoman, number). +lowerRoman :: GenParser Char st (ListNumberStyle, Int) +lowerRoman = do + num <- romanNumeral False + return (LowerRoman, num) + +-- | Parses a decimal numeral and returns (Decimal, number). +decimal :: GenParser Char st (ListNumberStyle, Int) +decimal = do + num <- many1 digit + return (Decimal, read num) + +-- | Parses a '@' and optional label and +-- returns (DefaultStyle, [next example number]). The next +-- example number is incremented in parser state, and the label +-- (if present) is added to the label table. +exampleNum :: GenParser Char ParserState (ListNumberStyle, Int) +exampleNum = do + char '@' + lab <- many (alphaNum <|> oneOf "_-") + st <- getState + let num = stateNextExample st + let newlabels = if null lab + then stateExamples st + else M.insert lab num $ stateExamples st + updateState $ \s -> s{ stateNextExample = num + 1 + , stateExamples = newlabels } + return (Example, num) + +-- | Parses a '#' returns (DefaultStyle, 1). +defaultNum :: GenParser Char st (ListNumberStyle, Int) +defaultNum = do + char '#' + return (DefaultStyle, 1) + +-- | Parses a lowercase letter and returns (LowerAlpha, number). +lowerAlpha :: GenParser Char st (ListNumberStyle, Int) +lowerAlpha = do + ch <- oneOf ['a'..'z'] + return (LowerAlpha, ord ch - ord 'a' + 1) + +-- | Parses an uppercase letter and returns (UpperAlpha, number). +upperAlpha :: GenParser Char st (ListNumberStyle, Int) +upperAlpha = do + ch <- oneOf ['A'..'Z'] + return (UpperAlpha, ord ch - ord 'A' + 1) + +-- | Parses a roman numeral i or I +romanOne :: GenParser Char st (ListNumberStyle, Int) +romanOne = (char 'i' >> return (LowerRoman, 1)) <|> + (char 'I' >> return (UpperRoman, 1)) + +-- | Parses an ordered list marker and returns list attributes. +anyOrderedListMarker :: GenParser Char ParserState ListAttributes +anyOrderedListMarker = choice $ + [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], + numParser <- [decimal, exampleNum, defaultNum, romanOne, + lowerAlpha, lowerRoman, upperAlpha, upperRoman]] + +-- | Parses a list number (num) followed by a period, returns list attributes. +inPeriod :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inPeriod num = try $ do + (style, start) <- num + char '.' + let delim = if style == DefaultStyle + then DefaultDelim + else Period + return (start, style, delim) + +-- | Parses a list number (num) followed by a paren, returns list attributes. +inOneParen :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inOneParen num = try $ do + (style, start) <- num + char ')' + return (start, style, OneParen) + +-- | Parses a list number (num) enclosed in parens, returns list attributes. +inTwoParens :: GenParser Char st (ListNumberStyle, Int) + -> GenParser Char st ListAttributes +inTwoParens num = try $ do + char '(' + (style, start) <- num + char ')' + return (start, style, TwoParens) + +-- | Parses an ordered list marker with a given style and delimiter, +-- returns number. +orderedListMarker :: ListNumberStyle + -> ListNumberDelim + -> GenParser Char ParserState Int +orderedListMarker style delim = do + let num = defaultNum <|> -- # can continue any kind of list + case style of + DefaultStyle -> decimal + Example -> exampleNum + Decimal -> decimal + UpperRoman -> upperRoman + LowerRoman -> lowerRoman + UpperAlpha -> upperAlpha + LowerAlpha -> lowerAlpha + let context = case delim of + DefaultDelim -> inPeriod + Period -> inPeriod + OneParen -> inOneParen + TwoParens -> inTwoParens + (start, _, _) <- context num + return start + +-- | Parses a character reference and returns a Str element. +charRef :: GenParser Char st Inline +charRef = do + c <- characterReference + return $ Str [c] + +-- | Parse a table using 'headerParser', 'rowParser', +-- 'lineParser', and 'footerParser'. +tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int]) + -> ([Int] -> GenParser Char ParserState [[Block]]) + -> GenParser Char ParserState sep + -> GenParser Char ParserState end + -> GenParser Char ParserState [Inline] + -> GenParser Char ParserState Block +tableWith headerParser rowParser lineParser footerParser captionParser = try $ do + caption' <- option [] captionParser + (heads, aligns, indices) <- headerParser + lines' <- rowParser indices `sepEndBy` lineParser + footerParser + caption <- if null caption' + then option [] captionParser + else return caption' + state <- getState + let numColumns = stateColumns state + let widths = widthsFromIndices numColumns indices + return $ Table caption aligns widths heads lines' + +-- Calculate relative widths of table columns, based on indices +widthsFromIndices :: Int -- Number of columns on terminal + -> [Int] -- Indices + -> [Double] -- Fractional relative sizes of columns +widthsFromIndices _ [] = [] +widthsFromIndices numColumns indices = + let lengths' = zipWith (-) indices (0:indices) + lengths = reverse $ + case reverse lengths' of + [] -> [] + [x] -> [x] + -- compensate for the fact that intercolumn + -- spaces are counted in widths of all columns + -- but the last... + (x:y:zs) -> if x < y && y - x <= 2 + then y:y:zs + else x:y:zs + totLength = sum lengths + quotient = if totLength > numColumns + then fromIntegral totLength + else fromIntegral numColumns + fracs = map (\l -> (fromIntegral l) / quotient) lengths in + tail fracs + +-- Parse a grid table: starts with row of '-' on top, then header +-- (which may be grid), then the rows, +-- which may be grid, separated by blank lines, and +-- ending with a footer (dashed line followed by blank line). +gridTableWith :: GenParser Char ParserState Block -- ^ Block parser + -> GenParser Char ParserState [Inline] -- ^ Caption parser + -> Bool -- ^ Headerless table + -> GenParser Char ParserState Block +gridTableWith block tableCaption headless = + tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption + +gridTableSplitLine :: [Int] -> String -> [String] +gridTableSplitLine indices line = + map removeFinalBar $ tail $ splitByIndices (init indices) line + +gridPart :: Char -> GenParser Char st (Int, Int) +gridPart ch = do + dashes <- many1 (char ch) + char '+' + return (length dashes, length dashes + 1) + +gridDashedLines :: Char -> GenParser Char st [(Int,Int)] +gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline + +removeFinalBar :: String -> String +removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") . + reverse + +-- | Separator between rows of grid table. +gridTableSep :: Char -> GenParser Char ParserState Char +gridTableSep ch = try $ gridDashedLines ch >> return '\n' + +-- | Parse header for a grid table. +gridTableHeader :: Bool -- ^ Headerless table + -> GenParser Char ParserState Block + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) +gridTableHeader headless block = try $ do + optional blanklines + dashes <- gridDashedLines '-' + rawContent <- if headless + then return $ repeat "" + else many1 + (notFollowedBy (gridTableSep '=') >> char '|' >> + many1Till anyChar newline) + if headless + then return () + else gridTableSep '=' >> return () + let lines' = map snd dashes + let indices = scanl (+) 0 lines' + let aligns = replicate (length lines') AlignDefault + -- RST does not have a notion of alignments + let rawHeads = if headless + then replicate (length dashes) "" + else map (intercalate " ") $ transpose + $ map (gridTableSplitLine indices) rawContent + heads <- mapM (parseFromString $ many block) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) + +gridTableRawLine :: [Int] -> GenParser Char ParserState [String] +gridTableRawLine indices = do + char '|' + line <- many1Till anyChar newline + return (gridTableSplitLine indices $ removeTrailingSpace line) + +-- | Parse row of grid table. +gridTableRow :: GenParser Char ParserState Block + -> [Int] + -> GenParser Char ParserState [[Block]] +gridTableRow block indices = do + colLines <- many1 (gridTableRawLine indices) + let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ + transpose colLines + mapM (liftM compactifyCell . parseFromString (many block)) cols + +removeOneLeadingSpace :: [String] -> [String] +removeOneLeadingSpace xs = + if all startsWithSpace xs + then map (drop 1) xs + else xs + where startsWithSpace "" = True + startsWithSpace (y:_) = y == ' ' + +compactifyCell :: [Block] -> [Block] +compactifyCell bs = head $ compactify [bs] + +-- | Parse footer for a grid table. +gridTableFooter :: GenParser Char ParserState [Char] +gridTableFooter = blanklines + +--- + +-- | Parse a string with a given parser and state. +readWith :: GenParser Char ParserState a -- ^ parser + -> ParserState -- ^ initial state + -> String -- ^ input string + -> a +readWith parser state input = + case runParser parser state "source" input of + Left err -> error $ "\nError:\n" ++ show err + Right result -> result + +-- | Parse a string with @parser@ (for testing). +testStringWith :: (Show a) => GenParser Char ParserState a + -> String + -> IO () +testStringWith parser str = UTF8.putStrLn $ show $ + readWith parser defaultParserState str + +-- | Parsing options. +data ParserState = ParserState + { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? + stateParserContext :: ParserContext, -- ^ Inside list? + stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? + stateKeys :: KeyTable, -- ^ List of reference keys +#ifdef _CITEPROC + stateCitations :: [String], -- ^ List of available citations +#endif + stateNotes :: NoteTable, -- ^ List of notes + stateTabStop :: Int, -- ^ Tab stop + stateStandalone :: Bool, -- ^ Parse bibliographic info? + stateTitle :: [Inline], -- ^ Title of document + stateAuthors :: [[Inline]], -- ^ Authors of document + stateDate :: [Inline], -- ^ Date of document + stateStrict :: Bool, -- ^ Use strict markdown syntax? + stateSmart :: Bool, -- ^ Use smart typography? + stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell + stateColumns :: Int, -- ^ Number of columns in terminal + stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used + stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks + stateNextExample :: Int, -- ^ Number of next example + stateExamples :: M.Map String Int -- ^ Map from example labels to numbers + } + deriving Show + +defaultParserState :: ParserState +defaultParserState = + ParserState { stateParseRaw = False, + stateParserContext = NullState, + stateQuoteContext = NoQuote, + stateSanitizeHTML = False, + stateKeys = M.empty, +#ifdef _CITEPROC + stateCitations = [], +#endif + stateNotes = [], + stateTabStop = 4, + stateStandalone = False, + stateTitle = [], + stateAuthors = [], + stateDate = [], + stateStrict = False, + stateSmart = False, + stateLiterateHaskell = False, + stateColumns = 80, + stateHeaderTable = [], + stateIndentedCodeClasses = [], + stateNextExample = 1, + stateExamples = M.empty } + +data HeaderType + = SingleHeader Char -- ^ Single line of characters underneath + | DoubleHeader Char -- ^ Lines of characters above and below + deriving (Eq, Show) + +data ParserContext + = ListItemState -- ^ Used when running parser on list item contents + | NullState -- ^ Default state + deriving (Eq, Show) + +data QuoteContext + = InSingleQuote -- ^ Used when parsing inside single quotes + | InDoubleQuote -- ^ Used when parsing inside double quotes + | NoQuote -- ^ Used when not parsing inside quotes + deriving (Eq, Show) + +type NoteTable = [(String, String)] + +newtype Key = Key [Inline] deriving (Show, Read) + +instance Eq Key where + Key a == Key b = refsMatch a b + +instance Ord Key where + compare (Key a) (Key b) = if a == b then EQ else compare a b + +type KeyTable = M.Map Key Target + +-- | Look up key in key table and return target object. +lookupKeySrc :: KeyTable -- ^ Key table + -> Key -- ^ Key + -> Maybe Target +lookupKeySrc table key = case M.lookup key table of + Nothing -> Nothing + Just src -> Just src + +-- | Returns @True@ if keys match (case insensitive). +refsMatch :: [Inline] -> [Inline] -> Bool +refsMatch ((Str x):restx) ((Str y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Emph x):restx) ((Emph y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strong x):restx) ((Strong y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Superscript x):restx) ((Superscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Subscript x):restx) ((Subscript y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) = + refsMatch x y && refsMatch restx resty +refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = + t == u && refsMatch x y && refsMatch restx resty +refsMatch ((Code x):restx) ((Code y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((Math t x):restx) ((Math u y):resty) = + ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty +refsMatch ((TeX x):restx) ((TeX y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = + ((map toLower x) == (map toLower y)) && refsMatch restx resty +refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty +refsMatch [] x = null x +refsMatch x [] = null x + diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5c188e3d9..6d54e7349 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -44,7 +44,8 @@ module Text.Pandoc.Readers.HTML ( import Text.ParserCombinators.Parsec import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Data.Maybe ( fromMaybe ) import Data.List ( isPrefixOf, isSuffixOf, intercalate ) diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 36940fab0..bbc5bb872 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -35,7 +35,8 @@ module Text.Pandoc.Readers.LaTeX ( import Text.ParserCombinators.Parsec import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe ) import Data.Char ( chr ) import Data.List ( isPrefixOf, isSuffixOf ) @@ -453,7 +454,7 @@ inline = choice [ str , accentedChar , nonbreakingSpace , specialChar - , rawLaTeXInline + , rawLaTeXInline' , escapedChar , unescapedChar ] <?> "inline" @@ -771,11 +772,16 @@ footnote = try $ do setInput rest return $ Note blocks +-- | Parse any LaTeX inline command and return it in a raw TeX inline element. +rawLaTeXInline' :: GenParser Char ParserState Inline +rawLaTeXInline' = do + notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore", + "\\section"] + rawLaTeXInline + -- | Parse any LaTeX command and return it in a raw TeX inline element. rawLaTeXInline :: GenParser Char ParserState Inline rawLaTeXInline = try $ do - notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore", - "\\section"] state <- getState if stateParseRaw state then do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 0d3b30d10..086f85bb4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -32,12 +32,13 @@ module Text.Pandoc.Readers.Markdown ( ) where import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) +import qualified Data.Map as M import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) import Data.Maybe -import qualified Data.Map as M import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' ) import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, anyHtmlInlineTag, anyHtmlTag, @@ -68,7 +69,7 @@ setextHChars = "=-" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\[]*_~`<>$!^-.&@'\"\8216\8217\8220\8221;" +specialChars = "\\[]*_~`<>$!^-.&@'\";" -- -- auxiliary functions @@ -203,10 +204,10 @@ referenceKey = try $ do tit <- option "" referenceTitle blanklines endPos <- getPosition - let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit)) + let target = (escapeURI $ removeTrailingSpace src, tit) st <- getState let oldkeys = stateKeys st - updateState $ \s -> s { stateKeys = newkey : oldkeys } + updateState $ \s -> s { stateKeys = M.insert (Key lab) target oldkeys } -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' @@ -716,7 +717,7 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -735,7 +736,9 @@ simpleTableHeader headless = try $ do let rawHeads' = if headless then replicate (length dashes) "" else rawHeads - return (rawHeads', aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads' + return (heads, aligns, indices) -- Parse a table footer - dashed lines followed by blank line. tableFooter :: GenParser Char ParserState [Char] @@ -764,65 +767,27 @@ multilineRow :: [Int] -> GenParser Char ParserState [[Block]] multilineRow indices = do colLines <- many1 (rawTableLine indices) - optional blanklines let cols = map unlines $ transpose colLines mapM (parseFromString (many plain)) cols --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths' = zipWith (-) indices (0:indices) - lengths = reverse $ - case reverse lengths' of - [] -> [] - [x] -> [x] - -- compensate for the fact that intercolumn - -- spaces are counted in widths of all columns - -- but the last... - (x:y:zs) -> if x < y && y - x <= 2 - then y:y:zs - else x:y:zs - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. tableCaption :: GenParser Char ParserState [Inline] tableCaption = try $ do skipNonindentSpaces - string "Table:" + string ":" <|> string "Table:" result <- many1 inline blanklines return $ normalizeSpaces result --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState end - -> GenParser Char ParserState Block -tableWith headerParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines' <- many1Till (lineParser indices) footerParser - caption <- option [] tableCaption - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table caption aligns widths heads lines' - -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block simpleTable headless = do Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine - (if headless then tableFooter else tableFooter <|> blanklines) + (return ()) + (if headless then tableFooter else tableFooter <|> blanklines) + tableCaption -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l @@ -833,10 +798,10 @@ simpleTable headless = do multilineTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block multilineTable headless = - tableWith (multilineTableHeader headless) multilineRow tableFooter + tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption multilineTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([String], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) multilineTableHeader headless = try $ do if headless then return '\n' @@ -860,7 +825,9 @@ multilineTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else map (intercalate " ") rawHeadsList - return ((map removeLeadingTrailingSpace rawHeads), aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the @@ -880,9 +847,14 @@ alignType strLst len = (True, True) -> AlignCenter (False, False) -> AlignDefault +gridTable :: Bool -- ^ Headerless table + -> GenParser Char ParserState Block +gridTable = gridTableWith block tableCaption + table :: GenParser Char ParserState Block table = multilineTable False <|> simpleTable True <|> - simpleTable False <|> multilineTable True <?> "table" + simpleTable False <|> multilineTable True <|> + gridTable False <|> gridTable True <?> "table" -- -- inline @@ -1081,30 +1053,28 @@ failIfInQuoteContext context = do singleQuoteStart :: GenParser Char ParserState Char singleQuoteStart = do failIfInQuoteContext InSingleQuote - char '\8216' <|> - (try $ do char '\'' - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) - -- possess/contraction - return '\'') + try $ do char '\'' + notFollowedBy (oneOf ")!],.;:-? \t\n") + notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> + satisfy (not . isAlphaNum))) + -- possess/contraction + return '\'' singleQuoteEnd :: GenParser Char st Char singleQuoteEnd = try $ do - char '\8217' <|> char '\'' + char '\'' notFollowedBy alphaNum return '\'' doubleQuoteStart :: GenParser Char ParserState Char doubleQuoteStart = do failIfInQuoteContext InDoubleQuote - char '\8220' <|> - (try $ do char '"' - notFollowedBy (oneOf " \t\n") - return '"') + try $ do char '"' + notFollowedBy (oneOf " \t\n") + return '"' doubleQuoteEnd :: GenParser Char st Char -doubleQuoteEnd = char '\8221' <|> char '"' +doubleQuoteEnd = char '"' ellipses :: GenParser Char st Inline ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses @@ -1229,7 +1199,7 @@ referenceLink lab = do optional (newline >> skipSpaces) >> reference)) let ref' = if null ref then lab else ref state <- getState - case lookupKeySrc (stateKeys state) ref' of + case lookupKeySrc (stateKeys state) (Key ref') of Nothing -> fail "no corresponding key" Just target -> return target @@ -1314,7 +1284,7 @@ inlineCitation = try $ do chkCit :: Target -> GenParser Char ParserState (Maybe Target) chkCit t = do st <- getState - case lookupKeySrc (stateKeys st) [Str $ fst t] of + case lookupKeySrc (stateKeys st) (Key [Str $ fst t]) of Just _ -> fail "This is a link" Nothing -> if elem (fst t) $ stateCitations st then return $ Just t diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index c293c4fcd..13afe5053 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -31,10 +31,13 @@ module Text.Pandoc.Readers.RST ( readRST ) where import Text.Pandoc.Definition -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.ParserCombinators.Parsec -import Control.Monad ( when, unless, liftM ) -import Data.List ( findIndex, delete, intercalate, transpose ) +import Control.Monad ( when, unless ) +import Data.List ( findIndex, intercalate, transpose, sort ) +import qualified Data.Map as M +import Text.Printf ( printf ) -- | Parse reStructuredText string and return Pandoc document. readRST :: ParserState -- ^ Parser state, including options for parser @@ -93,9 +96,6 @@ parseRST = do docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat setInput docMinusKeys setPosition startPos - st <- getState - let reversedKeys = stateKeys st - updateState $ \s -> s { stateKeys = reverse reversedKeys } -- now parse it for real... blocks <- parseBlocks let blocks' = filter (/= Null) blocks @@ -540,10 +540,10 @@ referenceName = quotedReferenceName <|> referenceKey :: GenParser Char ParserState [Char] referenceKey = do startPos <- getPosition - key <- choice [imageKey, anonymousKey, regularKey] + (key, target) <- choice [imageKey, anonymousKey, regularKey] st <- getState let oldkeys = stateKeys st - updateState $ \s -> s { stateKeys = key : oldkeys } + updateState $ \s -> s { stateKeys = M.insert key target oldkeys } optional blanklines endPos <- getPosition -- return enough blanks to replace key @@ -558,28 +558,29 @@ targetURI = do blanklines return $ escapeURI $ removeLeadingTrailingSpace $ contents -imageKey :: GenParser Char ParserState ([Inline], (String, [Char])) +imageKey :: GenParser Char ParserState (Key, Target) imageKey = try $ do string ".. |" ref <- manyTill inline (char '|') skipSpaces string "image::" src <- targetURI - return (normalizeSpaces ref, (src, "")) + return (Key (normalizeSpaces ref), (src, "")) -anonymousKey :: GenParser Char st ([Inline], (String, [Char])) +anonymousKey :: GenParser Char st (Key, Target) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI - return ([Str "_"], (src, "")) + pos <- getPosition + return (Key [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, "")) -regularKey :: GenParser Char ParserState ([Inline], (String, [Char])) +regularKey :: GenParser Char ParserState (Key, Target) regularKey = try $ do string ".. _" ref <- referenceName char ':' src <- targetURI - return (normalizeSpaces ref, (src, "")) + return (Key (normalizeSpaces ref), (src, "")) -- -- tables @@ -607,41 +608,20 @@ dashedLine ch = do simpleDashedLines :: Char -> GenParser Char st [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -gridPart :: Char -> GenParser Char st (Int, Int) -gridPart ch = do - dashes <- many1 (char ch) - char '+' - return (length dashes, length dashes + 1) - -gridDashedLines :: Char -> GenParser Char st [(Int,Int)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline - -- Parse a table row separator simpleTableSep :: Char -> GenParser Char ParserState Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -gridTableSep :: Char -> GenParser Char ParserState Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - -- Parse a table footer simpleTableFooter :: GenParser Char ParserState [Char] simpleTableFooter = try $ simpleTableSep '=' >> blanklines -gridTableFooter :: GenParser Char ParserState [Char] -gridTableFooter = blanklines - -- Parse a raw line and split it into chunks by indices. simpleTableRawLine :: [Int] -> GenParser Char ParserState [String] simpleTableRawLine indices = do line <- many1Till anyChar newline return (simpleTableSplitLine indices line) -gridTableRawLine :: [Int] -> GenParser Char ParserState [String] -gridTableRawLine indices = do - char '|' - line <- many1Till anyChar newline - return (gridTableSplitLine indices $ removeTrailingSpace line) - -- Parse a table row and return a list of blocks (columns). simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]] simpleTableRow indices = do @@ -651,64 +631,13 @@ simpleTableRow indices = do let cols = map unlines . transpose $ firstLine : colLines mapM (parseFromString (many plain)) cols -gridTableRow :: [Int] - -> GenParser Char ParserState [[Block]] -gridTableRow indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ - transpose colLines - mapM (liftM compactifyCell . parseFromString (many block)) cols - -compactifyCell :: [Block] -> [Block] -compactifyCell bs = head $ compactify [bs] - simpleTableSplitLine :: [Int] -> String -> [String] simpleTableSplitLine indices line = map removeLeadingTrailingSpace $ tail $ splitByIndices (init indices) line -gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = - map removeFinalBar $ tail $ splitByIndices (init indices) line - -removeFinalBar :: String -> String -removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") . - reverse - -removeOneLeadingSpace :: [String] -> [String] -removeOneLeadingSpace xs = - if all startsWithSpace xs - then map (drop 1) xs - else xs - where startsWithSpace "" = True - startsWithSpace (y:_) = y == ' ' - --- Calculate relative widths of table columns, based on indices -widthsFromIndices :: Int -- Number of columns on terminal - -> [Int] -- Indices - -> [Double] -- Fractional relative sizes of columns -widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths' = zipWith (-) indices (0:indices) - lengths = reverse $ - case reverse lengths' of - [] -> [] - [x] -> [x] - -- compensate for the fact that intercolumn - -- spaces are counted in widths of all columns - -- but the last... - (x:y:zs) -> if x < y && y - x <= 2 - then y:y:zs - else x:y:zs - totLength = sum lengths - quotient = if totLength > numColumns - then fromIntegral totLength - else fromIntegral numColumns - fracs = map (\l -> (fromIntegral l) / quotient) lengths in - tail fracs - simpleTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([[Char]], [Alignment], [Int]) + -> GenParser Char ParserState ([[Block]], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines rawContent <- if headless @@ -722,64 +651,23 @@ simpleTableHeader headless = try $ do let rawHeads = if headless then replicate (length dashes) "" else simpleTableSplitLine indices rawContent - return (rawHeads, aligns, indices) + heads <- mapM (parseFromString (many plain)) $ + map removeLeadingTrailingSpace rawHeads + return (heads, aligns, indices) -gridTableHeader :: Bool -- ^ Headerless table - -> GenParser Char ParserState ([String], [Alignment], [Int]) -gridTableHeader headless = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- if headless - then return $ repeat "" - else many1 - (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline) - if headless - then return () - else gridTableSep '=' >> return () - let lines' = map snd dashes - let indices = scanl (+) 0 lines' - let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments - let rawHeads = if headless - then replicate (length dashes) "" - else map (intercalate " ") $ transpose - $ map (gridTableSplitLine indices) rawContent - return (rawHeads, aligns, indices) - --- Parse a table using 'headerParser', 'lineParser', and 'footerParser'. -tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int]) - -> ([Int] -> GenParser Char ParserState [[Block]]) - -> GenParser Char ParserState sep - -> GenParser Char ParserState end - -> GenParser Char ParserState Block -tableWith headerParser rowParser lineParser footerParser = try $ do - (rawHeads, aligns, indices) <- headerParser - lines' <- rowParser indices `sepEndBy` lineParser - footerParser - heads <- mapM (parseFromString (many plain)) rawHeads - state <- getState - let captions = [] -- no notion of captions in RST - let numColumns = stateColumns state - let widths = widthsFromIndices numColumns indices - return $ Table captions aligns widths heads lines' - --- Parse a simple table with '---' header and one line per row. +-- Parse a simple table. simpleTable :: Bool -- ^ Headerless table -> GenParser Char ParserState Block simpleTable headless = do - Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter + Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return []) -- Simple tables get 0s for relative column widths (i.e., use default) return $ Table c a (replicate (length a) 0) h l where sep = return () -- optional (simpleTableSep '-') --- Parse a grid table: starts with row of '-' on top, then header --- (which may be grid), then the rows, --- which may be grid, separated by blank lines, and --- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> GenParser Char ParserState Block -gridTable headless = - tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter + -> GenParser Char ParserState Block +gridTable = gridTableWith block (return []) table :: GenParser Char ParserState Block table = gridTable False <|> simpleTable False <|> @@ -889,17 +777,21 @@ explicitLink = try $ do referenceLink :: GenParser Char ParserState Inline referenceLink = try $ do label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' - key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link state <- getState let keyTable = stateKeys state + let isAnonKey (Key [Str ('_':_)]) = True + isAnonKey _ = False + key <- option (Key label') $ + do char '_' + let anonKeys = sort $ filter isAnonKey $ M.keys keyTable + if null anonKeys + then pzero + else return (head anonKeys) (src,tit) <- case lookupKeySrc keyTable key of Nothing -> fail "no corresponding key" Just target -> return target - -- if anonymous link, remove first anon key so it won't be used again - let keyTable' = if (key == [Str "_"]) -- anonymous link? - then delete ([Str "_"], (src,tit)) keyTable -- remove first anon key - else keyTable - setState $ state { stateKeys = keyTable' } + -- if anonymous link, remove key so it won't be used again + when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable } return $ Link (normalizeSpaces label') (src, tit) autoURI :: GenParser Char ParserState Inline @@ -922,7 +814,7 @@ image = try $ do ref <- manyTill inline (char '|') state <- getState let keyTable = stateKeys state - (src,tit) <- case lookupKeySrc keyTable ref of + (src,tit) <- case lookupKeySrc keyTable (Key ref) of Nothing -> fail "no corresponding key" Just target -> return target return $ Image (normalizeSpaces ref) (src, tit) diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index 080354be1..40cf39987 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -28,208 +28,64 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of TeX math to a list of 'Pandoc' inline elements. -} module Text.Pandoc.Readers.TeXMath ( - readTeXMath + readTeXMath ) where import Text.ParserCombinators.Parsec import Text.Pandoc.Definition +import Text.TeXMath.Parser --- | Converts a string of raw TeX math to a list of 'Pandoc' inlines. +-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. +-- Defaults to raw formula between @$@ characters if entire formula +-- can't be converted. readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings) -> [Inline] -readTeXMath inp = case parse teXMath ("formula: " ++ inp) inp of - Left _ -> [Str inp] -- if unparseable, just include original - Right res -> res - -teXMath :: GenParser Char st [Inline] -teXMath = manyTill mathPart eof >>= return . concat - -mathPart :: GenParser Char st [Inline] -mathPart = whitespace <|> superscript <|> subscript <|> symbol <|> - argument <|> digits <|> letters <|> misc - -whitespace :: GenParser Char st [Inline] -whitespace = many1 space >> return [] - -symbol :: GenParser Char st [Inline] -symbol = try $ do - char '\\' - res <- many1 letter - case lookup res teXsymbols of - Just m -> return [Str m] - Nothing -> return [Str $ "\\" ++ res] - -argument :: GenParser Char st [Inline] -argument = try $ do - char '{' - res <- many mathPart - char '}' - return $ if null res - then [Str " "] - else [Str "{"] ++ concat res ++ [Str "}"] - -digits :: GenParser Char st [Inline] -digits = do - res <- many1 digit - return [Str res] - -letters :: GenParser Char st [Inline] -letters = do - res <- many1 letter - return [Emph [Str res]] - -misc :: GenParser Char st [Inline] -misc = do - res <- noneOf "}" - return [Str [res]] - -scriptArg :: GenParser Char st [Inline] -scriptArg = try $ do - (try (do{char '{'; r <- many mathPart; char '}'; return $ concat r})) - <|> symbol - <|> (do{c <- (letter <|> digit); return [Str [c]]}) - -superscript :: GenParser Char st [Inline] -superscript = try $ do - char '^' - arg <- scriptArg - return [Superscript arg] - -subscript :: GenParser Char st [Inline] -subscript = try $ do - char '_' - arg <- scriptArg - return [Subscript arg] - -withThinSpace :: String -> String -withThinSpace str = "\x2009" ++ str ++ "\x2009" - -teXsymbols :: [(String, String)] -teXsymbols = - [("alpha","\x3B1") - ,("beta", "\x3B2") - ,("chi", "\x3C7") - ,("delta", "\x3B4") - ,("Delta", "\x394") - ,("epsilon", "\x3B5") - ,("varepsilon", "\x25B") - ,("eta", "\x3B7") - ,("gamma", "\x3B3") - ,("Gamma", "\x393") - ,("iota", "\x3B9") - ,("kappa", "\x3BA") - ,("lambda", "\x3BB") - ,("Lambda", "\x39B") - ,("mu", "\x3BC") - ,("nu", "\x3BD") - ,("omega", "\x3C9") - ,("Omega", "\x3A9") - ,("phi", "\x3C6") - ,("varphi", "\x3D5") - ,("Phi", "\x3A6") - ,("pi", "\x3C0") - ,("Pi", "\x3A0") - ,("psi", "\x3C8") - ,("Psi", "\x3A8") - ,("rho", "\x3C1") - ,("sigma", "\x3C3") - ,("Sigma", "\x3A3") - ,("tau", "\x3C4") - ,("theta", "\x3B8") - ,("vartheta", "\x3D1") - ,("Theta", "\x398") - ,("upsilon", "\x3C5") - ,("xi", "\x3BE") - ,("Xi", "\x39E") - ,("zeta", "\x3B6") - ,("ne", "\x2260") - ,("lt", withThinSpace "<") - ,("le", withThinSpace "\x2264") - ,("leq", withThinSpace "\x2264") - ,("ge", withThinSpace "\x2265") - ,("geq", withThinSpace "\x2265") - ,("prec", withThinSpace "\x227A") - ,("succ", withThinSpace "\x227B") - ,("preceq", withThinSpace "\x2AAF") - ,("succeq", withThinSpace "\x2AB0") - ,("in", withThinSpace "\x2208") - ,("notin", withThinSpace "\x2209") - ,("subset", withThinSpace "\x2282") - ,("supset", withThinSpace "\x2283") - ,("subseteq", withThinSpace "\x2286") - ,("supseteq", withThinSpace "\x2287") - ,("equiv", withThinSpace "\x2261") - ,("cong", withThinSpace "\x2245") - ,("approx", withThinSpace "\x2248") - ,("propto", withThinSpace "\x221D") - ,("cdot", withThinSpace "\x22C5") - ,("star", withThinSpace "\x22C6") - ,("backslash", "\\") - ,("times", withThinSpace "\x00D7") - ,("divide", withThinSpace "\x00F7") - ,("circ", withThinSpace "\x2218") - ,("oplus", withThinSpace "\x2295") - ,("otimes", withThinSpace "\x2297") - ,("odot", withThinSpace "\x2299") - ,("sum", "\x2211") - ,("prod", "\x220F") - ,("wedge", withThinSpace "\x2227") - ,("bigwedge", withThinSpace "\x22C0") - ,("vee", withThinSpace "\x2228") - ,("bigvee", withThinSpace "\x22C1") - ,("cap", withThinSpace "\x2229") - ,("bigcap", withThinSpace "\x22C2") - ,("cup", withThinSpace "\x222A") - ,("bigcup", withThinSpace "\x22C3") - ,("neg", "\x00AC") - ,("implies", withThinSpace "\x21D2") - ,("iff", withThinSpace "\x21D4") - ,("forall", "\x2200") - ,("exists", "\x2203") - ,("bot", "\x22A5") - ,("top", "\x22A4") - ,("vdash", "\x22A2") - ,("models", withThinSpace "\x22A8") - ,("uparrow", "\x2191") - ,("downarrow", "\x2193") - ,("rightarrow", withThinSpace "\x2192") - ,("to", withThinSpace "\x2192") - ,("rightarrowtail", "\x21A3") - ,("twoheadrightarrow", withThinSpace "\x21A0") - ,("twoheadrightarrowtail", withThinSpace "\x2916") - ,("mapsto", withThinSpace "\x21A6") - ,("leftarrow", withThinSpace "\x2190") - ,("leftrightarrow", withThinSpace "\x2194") - ,("Rightarrow", withThinSpace "\x21D2") - ,("Leftarrow", withThinSpace "\x21D0") - ,("Leftrightarrow", withThinSpace "\x21D4") - ,("partial", "\x2202") - ,("nabla", "\x2207") - ,("pm", "\x00B1") - ,("emptyset", "\x2205") - ,("infty", "\x221E") - ,("aleph", "\x2135") - ,("ldots", "...") - ,("therefore", "\x2234") - ,("angle", "\x2220") - ,("quad", "\x00A0\x00A0") - ,("cdots", "\x22EF") - ,("vdots", "\x22EE") - ,("ddots", "\x22F1") - ,("diamond", "\x22C4") - ,("Box", "\x25A1") - ,("lfloor", "\x230A") - ,("rfloor", "\x230B") - ,("lceiling", "\x2308") - ,("rceiling", "\x2309") - ,("langle", "\x2329") - ,("rangle", "\x232A") - ,("int", "\8747") - ,("{", "{") - ,("}", "}") - ,("[", "[") - ,("]", "]") - ,("|", "|") - ,("||", "||") - ] +readTeXMath inp = case readTeXMath' inp of + Nothing -> [Str ("$" ++ inp ++ "$")] + Just res -> res + +-- | Like 'readTeXMath', but without the default. +readTeXMath' :: String -- ^ String to parse (assumes @'\n'@ line endings) + -> Maybe [Inline] +readTeXMath' inp = case parse formula "formula" inp of + Left _ -> Just [Str inp] + Right exps -> expsToInlines exps + +expsToInlines :: [Exp] -> Maybe [Inline] +expsToInlines xs = do + res <- mapM expToInlines xs + return (concat res) + +expToInlines :: Exp -> Maybe [Inline] +expToInlines (ENumber s) = Just [Str s] +expToInlines (EIdentifier s) = Just [Emph [Str s]] +expToInlines (EMathOperator s) = Just [Str s] +expToInlines (ESymbol t s) = Just $ addSpace t (Str s) + where addSpace Op x = [x, thinspace] + addSpace Bin x = [medspace, x, medspace] + addSpace Rel x = [widespace, x, widespace] + addSpace Pun x = [x, thinspace] + addSpace _ x = [x] + thinspace = Str "\x2006" + medspace = Str "\x2005" + widespace = Str "\x2004" +expToInlines (EStretchy x) = expToInlines x +expToInlines (EGrouped xs) = expsToInlines xs +expToInlines (ESpace _) = Just [Str " "] -- variable widths not supported +expToInlines (EBinary _ _ _) = Nothing +expToInlines (ESub x y) = do + x' <- expToInlines x + y' <- expToInlines y + return $ x' ++ [Subscript y'] +expToInlines (ESuper x y) = do + x' <- expToInlines x + y' <- expToInlines y + return $ x' ++ [Superscript y'] +expToInlines (ESubsup x y z) = do + x' <- expToInlines x + y' <- expToInlines y + z' <- expToInlines z + return $ x' ++ [Subscript y'] ++ [Superscript z'] +expToInlines (EText _ x) = Just [Emph [Str x]] +expToInlines _ = Nothing diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d465142b3..025b54b93 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -45,54 +45,15 @@ module Text.Pandoc.Shared ( toRomanNumeral, escapeURI, unescapeURI, + tabFilter, + -- * Prettyprinting wrapped, wrapIfNeeded, wrappedTeX, wrapTeXIfNeeded, BlockWrapper (..), wrappedBlocksToDoc, - tabFilter, - -- * Parsing - (>>~), - anyLine, - many1Till, - notFollowedBy', - oneOfStrings, - spaceChar, - skipSpaces, - blankline, - blanklines, - enclosed, - stringAnyCase, - parseFromString, - lineClump, - charsInBalanced, - charsInBalanced', - romanNumeral, - emailAddress, - uri, - withHorizDisplacement, - nullBlock, - failIfStrict, - failUnlessLHS, - escaped, - anyOrderedListMarker, - orderedListMarker, - charRef, - readWith, - testStringWith, - ParserState (..), - defaultParserState, - HeaderType (..), - ParserContext (..), - QuoteContext (..), - NoteTable, - KeyTable, - lookupKeySrc, - refsMatch, - -- * Prettyprinting hang', - prettyPandoc, -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, @@ -101,6 +62,7 @@ module Text.Pandoc.Shared ( hierarchicalize, uniqueIdent, isHeaderBlock, + headerShift, -- * Writer options HTMLMathMethod (..), ObfuscationMethod (..), @@ -112,27 +74,18 @@ module Text.Pandoc.Shared ( ) where import Text.Pandoc.Definition -import Text.ParserCombinators.Parsec +import qualified Text.Pandoc.UTF8 as UTF8 (readFile) import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest ) import qualified Text.PrettyPrint.HughesPJ as PP -import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, isAscii, - isPunctuation ) +import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii, + isLetter, isDigit ) import Data.List ( find, isPrefixOf, intercalate ) -import Network.URI ( parseURI, URI (..), isAllowedInURI, escapeURIString, unEscapeString ) +import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString ) import Codec.Binary.UTF8.String ( encodeString, decodeString ) import System.Directory import System.FilePath ( (</>) ) --- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv --- So we use System.IO.UTF8 only if we have an earlier version -#if MIN_VERSION_base(4,2,0) -#else -import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents ) -import System.IO.UTF8 -#endif -import Data.Generics +import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S -import Control.Monad (join) import Paths_pandoc (getDataFileName) -- @@ -157,11 +110,11 @@ splitByIndices (x:xs) lst = -- | Replace each occurrence of one sublist in a list with another. substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] substitute _ _ [] = [] -substitute [] _ lst = lst -substitute target replacement lst = +substitute [] _ xs = xs +substitute target replacement lst@(x:xs) = if target `isPrefixOf` lst - then replacement ++ (substitute target replacement $ drop (length target) lst) - else (head lst):(substitute target replacement $ tail lst) + then replacement ++ substitute target replacement (drop (length target) lst) + else x : substitute target replacement xs -- -- Text processing @@ -243,6 +196,30 @@ unescapeURI :: String -> String unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) . decodeString . unEscapeString +-- | Convert tabs to spaces and filter out DOS line endings. +-- Tabs will be preserved if tab stop is set to 0. +tabFilter :: Int -- ^ Tab stop + -> String -- ^ Input + -> String +tabFilter tabStop = + let go _ [] = "" + go _ ('\n':xs) = '\n' : go tabStop xs + go _ ('\r':'\n':xs) = '\n' : go tabStop xs + go _ ('\r':xs) = '\n' : go tabStop xs + go spsToNextStop ('\t':xs) = + if tabStop == 0 + then '\t' : go tabStop xs + else replicate spsToNextStop ' ' ++ go tabStop xs + go 1 (x:xs) = + x : go tabStop xs + go spsToNextStop (x:xs) = + x : go (spsToNextStop - 1) xs + in go tabStop + +-- +-- Prettyprinting +-- + -- | Wrap inlines to line length. wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>= @@ -312,560 +289,10 @@ wrappedBlocksToDoc = foldr addBlock empty addBlock (Pad d) accum = d $$ text "" $$ accum addBlock (Reg d) accum = d $$ accum --- | Convert tabs to spaces and filter out DOS line endings. --- Tabs will be preserved if tab stop is set to 0. -tabFilter :: Int -- ^ Tab stop - -> String -- ^ Input - -> String -tabFilter tabStop = - let go _ [] = "" - go _ ('\n':xs) = '\n' : go tabStop xs - go _ ('\r':'\n':xs) = '\n' : go tabStop xs - go _ ('\r':xs) = '\n' : go tabStop xs - go spsToNextStop ('\t':xs) = - if tabStop == 0 - then '\t' : go tabStop xs - else replicate spsToNextStop ' ' ++ go tabStop xs - go 1 (x:xs) = - x : go tabStop xs - go spsToNextStop (x:xs) = - x : go (spsToNextStop - 1) xs - in go tabStop - --- --- Parsing --- - --- | Like >>, but returns the operation on the left. --- (Suggested by Tillmann Rendel on Haskell-cafe list.) -(>>~) :: (Monad m) => m a -> m b -> m a -a >>~ b = a >>= \x -> b >> return x - --- | Parse any line of text -anyLine :: GenParser Char st [Char] -anyLine = manyTill anyChar newline - --- | Like @manyTill@, but reads at least one item. -many1Till :: GenParser tok st a - -> GenParser tok st end - -> GenParser tok st [a] -many1Till p end = do - first <- p - rest <- manyTill p end - return (first:rest) - --- | A more general form of @notFollowedBy@. This one allows any --- type of parser to be specified, and succeeds only if that parser fails. --- It does not consume any input. -notFollowedBy' :: Show b => GenParser a st b -> GenParser a st () -notFollowedBy' p = try $ join $ do a <- try p - return (unexpected (show a)) - <|> - return (return ()) --- (This version due to Andrew Pimlott on the Haskell mailing list.) - --- | Parses one of a list of strings (tried in order). -oneOfStrings :: [String] -> GenParser Char st String -oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings - --- | Parses a space or tab. -spaceChar :: CharParser st Char -spaceChar = char ' ' <|> char '\t' - --- | Skips zero or more spaces or tabs. -skipSpaces :: GenParser Char st () -skipSpaces = skipMany spaceChar - --- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: GenParser Char st Char -blankline = try $ skipSpaces >> newline - --- | Parses one or more blank lines and returns a string of newlines. -blanklines :: GenParser Char st [Char] -blanklines = many1 blankline - --- | Parses material enclosed between start and end parsers. -enclosed :: GenParser Char st t -- ^ start parser - -> GenParser Char st end -- ^ end parser - -> GenParser Char st a -- ^ content parser (to be used repeatedly) - -> GenParser Char st [a] -enclosed start end parser = try $ - start >> notFollowedBy space >> many1Till parser end - --- | Parse string, case insensitive. -stringAnyCase :: [Char] -> CharParser st String -stringAnyCase [] = string "" -stringAnyCase (x:xs) = do - firstChar <- char (toUpper x) <|> char (toLower x) - rest <- stringAnyCase xs - return (firstChar:rest) - --- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a -parseFromString parser str = do - oldPos <- getPosition - oldInput <- getInput - setInput str - result <- parser - setInput oldInput - setPosition oldPos - return result - --- | Parse raw line block up to and including blank lines. -lineClump :: GenParser Char st String -lineClump = blanklines - <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines) - --- | Parse a string of characters between an open character --- and a close character, including text between balanced --- pairs of open and close, which must be different. For example, --- @charsInBalanced '(' ')'@ will parse "(hello (there))" --- and return "hello (there)". Stop if a blank line is --- encountered. -charsInBalanced :: Char -> Char -> GenParser Char st String -charsInBalanced open close = try $ do - char open - raw <- many $ (many1 (noneOf [open, close, '\n'])) - <|> (do res <- charsInBalanced open close - return $ [open] ++ res ++ [close]) - <|> try (string "\n" >>~ notFollowedBy' blanklines) - char close - return $ concat raw - --- | Like @charsInBalanced@, but allow blank lines in the content. -charsInBalanced' :: Char -> Char -> GenParser Char st String -charsInBalanced' open close = try $ do - char open - raw <- many $ (many1 (noneOf [open, close])) - <|> (do res <- charsInBalanced' open close - return $ [open] ++ res ++ [close]) - char close - return $ concat raw - --- Auxiliary functions for romanNumeral: - -lowercaseRomanDigits :: [Char] -lowercaseRomanDigits = ['i','v','x','l','c','d','m'] - -uppercaseRomanDigits :: [Char] -uppercaseRomanDigits = map toUpper lowercaseRomanDigits - --- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Bool -- ^ Uppercase if true - -> GenParser Char st Int -romanNumeral upperCase = do - let romanDigits = if upperCase - then uppercaseRomanDigits - else lowercaseRomanDigits - lookAhead $ oneOf romanDigits - let [one, five, ten, fifty, hundred, fivehundred, thousand] = - map char romanDigits - thousands <- many thousand >>= (return . (1000 *) . length) - ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900 - fivehundreds <- many fivehundred >>= (return . (500 *) . length) - fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400 - hundreds <- many hundred >>= (return . (100 *) . length) - nineties <- option 0 $ try $ ten >> hundred >> return 90 - fifties <- many fifty >>= (return . (50 *) . length) - forties <- option 0 $ try $ ten >> fifty >> return 40 - tens <- many ten >>= (return . (10 *) . length) - nines <- option 0 $ try $ one >> ten >> return 9 - fives <- many five >>= (return . (5 *) . length) - fours <- option 0 $ try $ one >> five >> return 4 - ones <- many one >>= (return . length) - let total = thousands + ninehundreds + fivehundreds + fourhundreds + - hundreds + nineties + fifties + forties + tens + nines + - fives + fours + ones - if total == 0 - then fail "not a roman numeral" - else return total - --- Parsers for email addresses and URIs - -emailChar :: GenParser Char st Char -emailChar = alphaNum <|> oneOf "-+_." - -domainChar :: GenParser Char st Char -domainChar = alphaNum <|> char '-' - -domain :: GenParser Char st [Char] -domain = do - first <- many1 domainChar - dom <- many1 $ try (char '.' >> many1 domainChar ) - return $ intercalate "." (first:dom) - --- | Parses an email address; returns original and corresponding --- escaped mailto: URI. -emailAddress :: GenParser Char st (String, String) -emailAddress = try $ do - firstLetter <- alphaNum - restAddr <- many emailChar - let addr = firstLetter:restAddr - char '@' - dom <- domain - let full = addr ++ '@':dom - return (full, escapeURI $ "mailto:" ++ full) - --- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: GenParser Char st (String, String) -uri = try $ do - let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:", - "news:", "telnet:" ] - lookAhead $ oneOfStrings protocols - -- scan non-ascii characters and ascii characters allowed in a URI - str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c) - -- now see if they amount to an absolute URI - case parseURI (escapeURI str) of - Just uri' -> if uriScheme uri' `elem` protocols - then return (str, show uri') - else fail "not a URI" - Nothing -> fail "not a URI" - --- | Applies a parser, returns tuple of its results and its horizontal --- displacement (the difference between the source column at the end --- and the source column at the beginning). Vertical displacement --- (source row) is ignored. -withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply - -> GenParser Char st (a, Int) -- ^ (result, displacement) -withHorizDisplacement parser = do - pos1 <- getPosition - result <- parser - pos2 <- getPosition - return (result, sourceColumn pos2 - sourceColumn pos1) - --- | Parses a character and returns 'Null' (so that the parser can move on --- if it gets stuck). -nullBlock :: GenParser Char st Block -nullBlock = anyChar >> return Null - --- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser Char ParserState () -failIfStrict = do - state <- getState - if stateStrict state then fail "strict mode" else return () - --- | Fail unless we're in literate haskell mode. -failUnlessLHS :: GenParser tok ParserState () -failUnlessLHS = do - state <- getState - if stateLiterateHaskell state then return () else fail "Literate haskell feature" - --- | Parses backslash, then applies character parser. -escaped :: GenParser Char st Char -- ^ Parser for character to escape - -> GenParser Char st Inline -escaped parser = try $ do - char '\\' - result <- parser - return (Str [result]) - --- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: GenParser Char st (ListNumberStyle, Int) -upperRoman = do - num <- romanNumeral True - return (UpperRoman, num) - --- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: GenParser Char st (ListNumberStyle, Int) -lowerRoman = do - num <- romanNumeral False - return (LowerRoman, num) - --- | Parses a decimal numeral and returns (Decimal, number). -decimal :: GenParser Char st (ListNumberStyle, Int) -decimal = do - num <- many1 digit - return (Decimal, read num) - --- | Parses a '@' and optional label and --- returns (DefaultStyle, [next example number]). The next --- example number is incremented in parser state, and the label --- (if present) is added to the label table. -exampleNum :: GenParser Char ParserState (ListNumberStyle, Int) -exampleNum = do - char '@' - lab <- many (alphaNum <|> oneOf "_-") - st <- getState - let num = stateNextExample st - let newlabels = if null lab - then stateExamples st - else M.insert lab num $ stateExamples st - updateState $ \s -> s{ stateNextExample = num + 1 - , stateExamples = newlabels } - return (Example, num) - --- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: GenParser Char st (ListNumberStyle, Int) -defaultNum = do - char '#' - return (DefaultStyle, 1) - --- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: GenParser Char st (ListNumberStyle, Int) -lowerAlpha = do - ch <- oneOf ['a'..'z'] - return (LowerAlpha, ord ch - ord 'a' + 1) - --- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: GenParser Char st (ListNumberStyle, Int) -upperAlpha = do - ch <- oneOf ['A'..'Z'] - return (UpperAlpha, ord ch - ord 'A' + 1) - --- | Parses a roman numeral i or I -romanOne :: GenParser Char st (ListNumberStyle, Int) -romanOne = (char 'i' >> return (LowerRoman, 1)) <|> - (char 'I' >> return (UpperRoman, 1)) - --- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: GenParser Char ParserState ListAttributes -anyOrderedListMarker = choice $ - [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], - numParser <- [decimal, exampleNum, defaultNum, romanOne, - lowerAlpha, lowerRoman, upperAlpha, upperRoman]] - --- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inPeriod num = try $ do - (style, start) <- num - char '.' - let delim = if style == DefaultStyle - then DefaultDelim - else Period - return (start, style, delim) - --- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inOneParen num = try $ do - (style, start) <- num - char ')' - return (start, style, OneParen) - --- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: GenParser Char st (ListNumberStyle, Int) - -> GenParser Char st ListAttributes -inTwoParens num = try $ do - char '(' - (style, start) <- num - char ')' - return (start, style, TwoParens) - --- | Parses an ordered list marker with a given style and delimiter, --- returns number. -orderedListMarker :: ListNumberStyle - -> ListNumberDelim - -> GenParser Char ParserState Int -orderedListMarker style delim = do - let num = defaultNum <|> -- # can continue any kind of list - case style of - DefaultStyle -> decimal - Example -> exampleNum - Decimal -> decimal - UpperRoman -> upperRoman - LowerRoman -> lowerRoman - UpperAlpha -> upperAlpha - LowerAlpha -> lowerAlpha - let context = case delim of - DefaultDelim -> inPeriod - Period -> inPeriod - OneParen -> inOneParen - TwoParens -> inTwoParens - (start, _, _) <- context num - return start - --- | Parses a character reference and returns a Str element. -charRef :: GenParser Char st Inline -charRef = do - c <- characterReference - return $ Str [c] - --- | Parse a string with a given parser and state. -readWith :: GenParser Char ParserState a -- ^ parser - -> ParserState -- ^ initial state - -> String -- ^ input string - -> a -readWith parser state input = - case runParser parser state "source" input of - Left err -> error $ "\nError:\n" ++ show err - Right result -> result - --- | Parse a string with @parser@ (for testing). -testStringWith :: (Show a) => GenParser Char ParserState a - -> String - -> IO () -testStringWith parser str = putStrLn $ show $ - readWith parser defaultParserState str - --- | Parsing options. -data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? - stateParserContext :: ParserContext, -- ^ Inside list? - stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? - stateKeys :: KeyTable, -- ^ List of reference keys -#ifdef _CITEPROC - stateCitations :: [String], -- ^ List of available citations -#endif - stateNotes :: NoteTable, -- ^ List of notes - stateTabStop :: Int, -- ^ Tab stop - stateStandalone :: Bool, -- ^ Parse bibliographic info? - stateTitle :: [Inline], -- ^ Title of document - stateAuthors :: [[Inline]], -- ^ Authors of document - stateDate :: [Inline], -- ^ Date of document - stateStrict :: Bool, -- ^ Use strict markdown syntax? - stateSmart :: Bool, -- ^ Use smart typography? - stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell - stateColumns :: Int, -- ^ Number of columns in terminal - stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used - stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks - stateNextExample :: Int, -- ^ Number of next example - stateExamples :: M.Map String Int -- ^ Map from example labels to numbers - } - deriving Show - -defaultParserState :: ParserState -defaultParserState = - ParserState { stateParseRaw = False, - stateParserContext = NullState, - stateQuoteContext = NoQuote, - stateSanitizeHTML = False, - stateKeys = [], -#ifdef _CITEPROC - stateCitations = [], -#endif - stateNotes = [], - stateTabStop = 4, - stateStandalone = False, - stateTitle = [], - stateAuthors = [], - stateDate = [], - stateStrict = False, - stateSmart = False, - stateLiterateHaskell = False, - stateColumns = 80, - stateHeaderTable = [], - stateIndentedCodeClasses = [], - stateNextExample = 1, - stateExamples = M.empty } - -data HeaderType - = SingleHeader Char -- ^ Single line of characters underneath - | DoubleHeader Char -- ^ Lines of characters above and below - deriving (Eq, Show) - -data ParserContext - = ListItemState -- ^ Used when running parser on list item contents - | NullState -- ^ Default state - deriving (Eq, Show) - -data QuoteContext - = InSingleQuote -- ^ Used when parsing inside single quotes - | InDoubleQuote -- ^ Used when parsing inside double quotes - | NoQuote -- ^ Used when not parsing inside quotes - deriving (Eq, Show) - -type NoteTable = [(String, String)] - -type KeyTable = [([Inline], Target)] - --- | Look up key in key table and return target object. -lookupKeySrc :: KeyTable -- ^ Key table - -> [Inline] -- ^ Key - -> Maybe Target -lookupKeySrc table key = case find (refsMatch key . fst) table of - Nothing -> Nothing - Just (_, src) -> Just src - --- | Returns @True@ if keys match (case insensitive). -refsMatch :: [Inline] -> [Inline] -> Bool -refsMatch ((Str x):restx) ((Str y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Emph x):restx) ((Emph y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strong x):restx) ((Strong y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Superscript x):restx) ((Superscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Subscript x):restx) ((Subscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = - t == u && refsMatch x y && refsMatch restx resty -refsMatch ((Code x):restx) ((Code y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Math t x):restx) ((Math u y):resty) = - ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty -refsMatch ((TeX x):restx) ((TeX y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty -refsMatch [] x = null x -refsMatch x [] = null x - --- --- Prettyprinting --- - -- | A version of hang that works like the version in pretty-1.0.0.0 hang' :: Doc -> Int -> Doc -> Doc hang' d1 n d2 = d1 $$ (nest n d2) --- | Indent string as a block. -indentBy :: Int -- ^ Number of spaces to indent the block - -> Int -- ^ Number of spaces (rel to block) to indent first line - -> String -- ^ Contents of block to indent - -> String -indentBy _ _ [] = "" -indentBy num first str = - let (firstLine:restLines) = lines str - firstLineIndent = num + first - in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ - (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines) - --- | Prettyprint list of Pandoc blocks elements. -prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks - -> [Block] -- ^ List of blocks - -> String -prettyBlockList indent [] = indentBy indent 0 "[]" -prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ - (intercalate "\n, " (map prettyBlock blocks)) ++ " ]" - --- | Prettyprint Pandoc block element. -prettyBlock :: Block -> String -prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ - (prettyBlockList 2 blocks) -prettyBlock (OrderedList attribs blockLists) = - "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ - (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks) - blockLists)) ++ " ]" -prettyBlock (BulletList blockLists) = "BulletList\n" ++ - indentBy 2 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" -prettyBlock (DefinitionList items) = "DefinitionList\n" ++ - indentBy 2 0 ("[ " ++ (intercalate "\n, " - (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++ - indentBy 3 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++ - ")") items))) ++ " ]" -prettyBlock (Table caption aligns widths header rows) = - "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ - show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ - (intercalate ",\n" (map prettyRow rows)) ++ " ]" - where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) - cols))) ++ " ]" -prettyBlock block = show block - --- | Prettyprint Pandoc document. -prettyPandoc :: Pandoc -> String -prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ - ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" - -- -- Pandoc block and inline list processing -- @@ -877,7 +304,7 @@ orderedListMarkers (start, numstyle, numdelim) = let singleton c = [c] nums = case numstyle of DefaultStyle -> map show [start..] - Example -> map show [start..] + Example -> map show [start..] Decimal -> map show [start..] UpperAlpha -> drop (start - 1) $ cycle $ map singleton ['A'..'Z'] @@ -937,38 +364,37 @@ data Element = Blk Block -- lvl num ident label contents deriving (Eq, Read, Show, Typeable, Data) --- | Convert Pandoc inline list to plain text identifier. +-- | Convert Pandoc inline list to plain text identifier. HTML +-- identifiers must start with a letter, and may contain only +-- letters, digits, and the characters _-. inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier' - -inlineListToIdentifier' :: [Inline] -> [Char] -inlineListToIdentifier' [] = "" -inlineListToIdentifier' (x:xs) = - xAsText ++ inlineListToIdentifier' xs - where xAsText = case x of - Str s -> filter (\c -> c `elem` "_-." || not (isPunctuation c)) $ - intercalate "-" $ words $ map toLower s - Emph lst -> inlineListToIdentifier' lst - Strikeout lst -> inlineListToIdentifier' lst - Superscript lst -> inlineListToIdentifier' lst - SmallCaps lst -> inlineListToIdentifier' lst - Subscript lst -> inlineListToIdentifier' lst - Strong lst -> inlineListToIdentifier' lst - Quoted _ lst -> inlineListToIdentifier' lst - Cite _ lst -> inlineListToIdentifier' lst - Code s -> s - Space -> "-" - EmDash -> "-" - EnDash -> "-" - Apostrophe -> "" - Ellipses -> "" - LineBreak -> "-" - Math _ _ -> "" - TeX _ -> "" - HtmlInline _ -> "" - Link lst _ -> inlineListToIdentifier' lst - Image lst _ -> inlineListToIdentifier' lst - Note _ -> "" +inlineListToIdentifier = + dropWhile (not . isAlpha) . intercalate "-" . words . map toLower . + filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") . + concatMap extractText + where extractText x = case x of + Str s -> s + Emph lst -> concatMap extractText lst + Strikeout lst -> concatMap extractText lst + Superscript lst -> concatMap extractText lst + SmallCaps lst -> concatMap extractText lst + Subscript lst -> concatMap extractText lst + Strong lst -> concatMap extractText lst + Quoted _ lst -> concatMap extractText lst + Cite _ lst -> concatMap extractText lst + Code s -> s + Space -> " " + EmDash -> "---" + EnDash -> "--" + Apostrophe -> "" + Ellipses -> "..." + LineBreak -> " " + Math _ s -> s + TeX _ -> "" + HtmlInline _ -> "" + Link lst _ -> concatMap extractText lst + Image lst _ -> concatMap extractText lst + Note _ -> "" -- | Convert list of Pandoc blocks into (hierarchical) list of Elements hierarchicalize :: [Block] -> [Element] @@ -1000,7 +426,9 @@ headerLtEq _ _ = False -- Second argument is a list of already used identifiers. uniqueIdent :: [Inline] -> [String] -> String uniqueIdent title' usedIdents = - let baseIdent = inlineListToIdentifier title' + let baseIdent = case inlineListToIdentifier title' of + "" -> "section" + x -> x numIdent n = baseIdent ++ "-" ++ show n in if baseIdent `elem` usedIdents then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of @@ -1013,6 +441,13 @@ isHeaderBlock :: Block -> Bool isHeaderBlock (Header _ _) = True isHeaderBlock _ = False +-- | Shift header levels up or down. +headerShift :: Int -> Pandoc -> Pandoc +headerShift n = processWith shift + where shift :: Block -> Block + shift (Header level inner) = Header (level + n) inner + shift x = x + -- -- Writer options -- @@ -1036,8 +471,7 @@ data WriterOptions = WriterOptions { writerStandalone :: Bool -- ^ Include header and footer , writerTemplate :: String -- ^ Template to use in standalone mode , writerVariables :: [(String, String)] -- ^ Variables to set in template - , writerIncludeBefore :: String -- ^ Text to include before the body - , writerIncludeAfter :: String -- ^ Text to include after the body + , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs , writerTableOfContents :: Bool -- ^ Include table of contents , writerS5 :: Bool -- ^ We're writing S5 @@ -1052,6 +486,8 @@ data WriterOptions = WriterOptions , writerLiterateHaskell :: Bool -- ^ Write as literate haskell , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML + , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file + , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory } deriving Show -- | Default writer options. @@ -1060,8 +496,7 @@ defaultWriterOptions = WriterOptions { writerStandalone = False , writerTemplate = "" , writerVariables = [] - , writerIncludeBefore = "" - , writerIncludeAfter = "" + , writerEPUBMetadata = "" , writerTabStop = 4 , writerTableOfContents = False , writerS5 = False @@ -1076,6 +511,8 @@ defaultWriterOptions = , writerLiterateHaskell = False , writerEmailObfuscation = JavascriptObfuscation , writerIdentifierPrefix = "" + , writerSourceDirectory = "." + , writerUserDataDir = Nothing } -- @@ -1096,6 +533,6 @@ inDirectory path action = do readDataFile :: Maybe FilePath -> FilePath -> IO String readDataFile userDir fname = case userDir of - Nothing -> getDataFileName fname >>= readFile - Just u -> catch (readFile $ u </> fname) - (\_ -> getDataFileName fname >>= readFile) + Nothing -> getDataFileName fname >>= UTF8.readFile + Just u -> catch (UTF8.readFile $ u </> fname) + (\_ -> getDataFileName fname >>= UTF8.readFile) diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 2238f4da8..372954ae3 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -173,7 +173,7 @@ for = try $ do string "$for(" id' <- ident string ")$" - -- if newline after the "if", then a newline after "endif" will be swallowed + -- if newline after the "for", then a newline after "endfor" will be swallowed multiline <- option False $ try $ skipEndline >> return True let matches = filter (\(k,_) -> k == id') vars let indent = replicate pos ' ' diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs new file mode 100644 index 000000000..3dd61176c --- /dev/null +++ b/src/Text/Pandoc/UTF8.hs @@ -0,0 +1,72 @@ +{- +Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.UTF8 + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +UTF-8 aware string IO functions that will work with GHC 6.10 or 6.12. +-} +module Text.Pandoc.UTF8 ( readFile + , writeFile + , getContents + , putStr + , putStrLn + , hPutStr + , hPutStrLn + ) + +where +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 (toString, fromString) +import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn) +import System.IO (Handle) +import Control.Monad (liftM) + +bom :: B.ByteString +bom = B.pack [0xEF, 0xBB, 0xBF] + +stripBOM :: B.ByteString -> B.ByteString +stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s +stripBOM s = s + +readFile :: FilePath -> IO String +readFile = liftM (toString . stripBOM) . B.readFile + +writeFile :: FilePath -> String -> IO () +writeFile f = B.writeFile f . fromString + +getContents :: IO String +getContents = liftM (toString . stripBOM) B.getContents + +putStr :: String -> IO () +putStr = B.putStr . fromString + +putStrLn :: String -> IO () +putStrLn = B.putStrLn . fromString + +hPutStr :: Handle -> String -> IO () +hPutStr h = B.hPutStr h . fromString + +hPutStrLn :: Handle -> String -> IO () +hPutStrLn h s = hPutStr h (s ++ "\n") diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs new file mode 100644 index 000000000..082644eea --- /dev/null +++ b/src/Text/Pandoc/UUID.hs @@ -0,0 +1,77 @@ +{- +Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.UUID + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +UUID generation using Version 4 (random method) described +in RFC4122. See http://tools.ietf.org/html/rfc4122 +-} + +module Text.Pandoc.UUID ( UUID, getRandomUUID ) where + +import Text.Printf ( printf ) +import System.Random ( randomIO ) +import Data.Word +import Data.Bits ( setBit, clearBit ) +import Control.Monad ( liftM ) + +data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 + Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8 + +instance Show UUID where + show (UUID a b c d e f g h i j k l m n o p) = + "urn:uuid:" ++ + printf "%02x" a ++ + printf "%02x" b ++ + printf "%02x" c ++ + printf "%02x" d ++ + "-" ++ + printf "%02x" e ++ + printf "%02x" f ++ + "-" ++ + printf "%02x" g ++ + printf "%02x" h ++ + "-" ++ + printf "%02x" i ++ + printf "%02x" j ++ + "-" ++ + printf "%02x" k ++ + printf "%02x" l ++ + printf "%02x" m ++ + printf "%02x" n ++ + printf "%02x" o ++ + printf "%02x" p + +getRandomUUID :: IO UUID +getRandomUUID = do + let getRN :: a -> IO Word8 + getRN _ = liftM fromIntegral (randomIO :: IO Int) + [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] <- mapM getRN ([1..16] :: [Int]) + -- set variant + let i' = i `setBit` 7 `clearBit` 6 + -- set version (0100 for random) + let g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4 + return $ UUID a b c d e f g' h i' j k l m n o p + diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs new file mode 100644 index 000000000..deaa2fe33 --- /dev/null +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -0,0 +1,283 @@ +{- +Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.EPUB + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to EPUB. +-} +module Text.Pandoc.Writers.EPUB ( writeEPUB ) where +import Data.IORef +import Data.Maybe ( fromMaybe, isNothing ) +import Data.List ( findIndices, isPrefixOf ) +import System.Environment ( getEnv ) +import System.FilePath ( (</>), takeBaseName, takeExtension ) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 ( fromString ) +import Codec.Archive.Zip +import System.Time +import Text.Pandoc.Shared hiding ( Element ) +import Text.Pandoc.Definition +import Control.Monad (liftM) +import Text.XML.Light hiding (ppTopElement) +import Text.Pandoc.UUID +import Text.Pandoc.Writers.HTML +import Text.Pandoc.Writers.Markdown ( writePlain ) +import Data.Char ( toLower ) + +-- | Produce an EPUB file from a Pandoc document. +writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line + -> WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> IO B.ByteString +writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do + (TOD epochtime _) <- getClockTime + let mkEntry path content = toEntry path epochtime content + let opts' = opts{ writerEmailObfuscation = NoObfuscation + , writerStandalone = True + , writerWrapText = False } + let sourceDir = writerSourceDirectory opts' + + -- title page + let vars = writerVariables opts' + let tpContent = fromString $ writeHtmlString + opts'{writerTemplate = pageTemplate + ,writerVariables = ("titlepage","yes"):vars} + (Pandoc meta []) + let tpEntry = mkEntry "title_page.xhtml" tpContent + + -- handle pictures + picsRef <- newIORef [] + Pandoc _ blocks <- liftM (processWith transformBlock) $ processWithM + (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc + pics <- readIORef picsRef + let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e -> + return e{ eRelativePath = newsrc } + picEntries <- mapM readPicEntry pics + + -- body pages + let isH1 (Header 1 _) = True + isH1 _ = False + let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks + let chunks = splitByIndices h1Indices blocks + let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys + titleize xs = Pandoc meta xs + let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate + , writerHTMLMathMethod = PlainMath } + let chapters = map titleize chunks + let chapterToEntry :: Int -> Pandoc -> Entry + chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $ + fromString $ chapToHtml chap + let chapterEntries = zipWith chapterToEntry [1..] chapters + + -- contents.opf + lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang") + (\_ -> return "en-US") + uuid <- getRandomUUID + let chapterNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", "application/xhtml+xml")] $ () + let chapterRefNode ent = unode "itemref" ! + [("idref", takeBaseName $ eRelativePath ent)] $ () + let pictureNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", fromMaybe "application/octet-stream" + $ imageTypeOf $ eRelativePath ent)] $ () + let plainify t = removeTrailingSpace $ + writePlain opts'{ writerStandalone = False } $ + Pandoc meta [Plain t] + let plainTitle = plainify $ docTitle meta + let plainAuthors = map plainify $ docAuthors meta + let contentsData = fromString $ ppTopElement $ + unode "package" ! [("version","2.0") + ,("xmlns","http://www.idpf.org/2007/opf") + ,("unique-identifier","BookId")] $ + [ metadataElement (writerEPUBMetadata opts') + uuid lang plainTitle plainAuthors + , unode "manifest" $ + [ unode "item" ! [("id","ncx"), ("href","toc.ncx") + ,("media-type","application/x-dtbncx+xml")] $ () + , unode "item" ! [("id","style"), ("href","stylesheet.css") + ,("media-type","text/css")] $ () + ] ++ + map chapterNode (tpEntry : chapterEntries) ++ + map pictureNode picEntries + , unode "spine" ! [("toc","ncx")] $ + map chapterRefNode (tpEntry : chapterEntries) + ] + let contentsEntry = mkEntry "content.opf" contentsData + + -- toc.ncx + let navPointNode ent n tit = unode "navPoint" ! + [("id", "navPoint-" ++ show n) + ,("playOrder", show n)] $ + [ unode "navLabel" $ unode "text" tit + , unode "content" ! [("src", + eRelativePath ent)] $ () + ] + let tocData = fromString $ ppTopElement $ + unode "ncx" ! [("version","2005-1") + ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ + [ unode "head" + [ unode "meta" ! [("name","dtb:uid") + ,("content", show uuid)] $ () + , unode "meta" ! [("name","dtb:depth") + ,("content", "1")] $ () + , unode "meta" ! [("name","dtb:totalPageCount") + ,("content", "0")] $ () + , unode "meta" ! [("name","dtb:maxPageNumber") + ,("content", "0")] $ () + ] + , unode "docTitle" $ unode "text" $ plainTitle + , unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries) + [1..(length chapterEntries + 1)] + ("Title Page" : map (\(Pandoc m _) -> + plainify $ docTitle m) chapters) + ] + let tocEntry = mkEntry "toc.ncx" tocData + + -- mimetype + let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip" + + -- container.xml + let containerData = fromString $ ppTopElement $ + unode "container" ! [("version","1.0") + ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ + unode "rootfiles" $ + unode "rootfile" ! [("full-path","content.opf") + ,("media-type","application/oebps-package+xml")] $ () + let containerEntry = mkEntry "META-INF/container.xml" containerData + + -- stylesheet + stylesheet <- case mbStylesheet of + Just s -> return s + Nothing -> readDataFile (writerUserDataDir opts) "epub.css" + let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet + + -- construct archive + let archive = foldr addEntryToArchive emptyArchive + (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry : + contentsEntry : tocEntry : (picEntries ++ chapterEntries) ) + return $ fromArchive archive + +metadataElement :: String -> UUID -> String -> String -> [String] -> Element +metadataElement metadataXML uuid lang title authors = + let userNodes = parseXML metadataXML + elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ + filter isDublinCoreElement $ onlyElems userNodes + dublinElements = ["contributor","coverage","creator","date", + "description","format","identifier","language","publisher", + "relation","rights","source","subject","title","type"] + isDublinCoreElement e = qPrefix (elName e) == Just "dc" && + qName (elName e) `elem` dublinElements + contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) + newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ + [ unode "dc:language" lang | not (elt `contains` "language") ] ++ + [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | + not (elt `contains` "identifier") ] ++ + [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] + in elt{ elContent = elContent elt ++ map Elem newNodes } + +transformInlines :: HTMLMathMethod + -> FilePath + -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images + -> [Inline] + -> IO [Inline] +transformInlines _ _ _ (Image lab (src,_) : xs) | isNothing (imageTypeOf src) = + return $ Emph lab : xs +transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do + pics <- readIORef picsRef + let oldsrc = sourceDir </> src + let ext = takeExtension src + newsrc <- case lookup oldsrc pics of + Just n -> return n + Nothing -> do + let new = "images/img" ++ show (length pics) ++ ext + modifyIORef picsRef ( (oldsrc, new): ) + return new + return $ Image lab (newsrc, tit) : xs +transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do + let writeHtmlInline opts z = removeTrailingSpace $ + writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]] + mathml = writeHtmlInline defaultWriterOptions{ + writerHTMLMathMethod = MathML Nothing } x + fallback = writeHtmlInline defaultWriterOptions{ + writerHTMLMathMethod = PlainMath } x + inOps = "<ops:switch xmlns:ops=\"http://www.idpf.org/2007/ops\">" ++ + "<ops:case required-namespace=\"http://www.w3.org/1998/Math/MathML\">" ++ + mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++ + "</ops:switch>" + result = if "<math" `isPrefixOf` mathml then inOps else mathml + return $ HtmlInline result : xs +transformInlines _ _ _ (HtmlInline _ : xs) = return $ Str "" : xs +transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs +transformInlines _ _ _ xs = return xs + +transformBlock :: Block -> Block +transformBlock (RawHtml _) = Null +transformBlock x = x + +(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element +(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) + +-- | Version of 'ppTopElement' that specifies UTF-8 encoding. +ppTopElement :: Element -> String +ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement + +imageTypeOf :: FilePath -> Maybe String +imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of + "jpg" -> Just "image/jpeg" + "jpeg" -> Just "image/jpeg" + "jfif" -> Just "image/jpeg" + "png" -> Just "image/png" + "gif" -> Just "image/gif" + "svg" -> Just "image/svg+xml" + _ -> Nothing + +pageTemplate :: String +pageTemplate = unlines + [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" + , "<html xmlns=\"http://www.w3.org/1999/xhtml\">" + , "<head>" + , "<title>$title$</title>" + , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />" + , "</head>" + , "<body>" + , "$if(titlepage)$" + , "<h1 class=\"title\">$title$</h1>" + , "$for(author)$" + , "<h2 class=\"author\">$author$</h2>" + , "$endfor$" + , "$else$" + , "<h1>$title$</h1>" + , "$body$" + , "$endif$" + , "</body>" + , "</html>" + ] + diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 299471328..08cd18ad0 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -285,9 +285,12 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do attrs = [theclass (unwords classes') | not (null classes')] ++ [prefixedId opts id' | not (null id')] ++ map (\(x,y) -> strAttr x y) keyvals + addBird = if "literate" `elem` classes' + then unlines . map ("> " ++) . lines + else unlines . lines in return $ pre ! attrs $ thecode << (replicate (length leadingBreaks) br +++ - [stringToHtml $ rawCode' ++ "\n"]) + [stringToHtml $ addBird rawCode']) Right h -> modify (\st -> st{ stHighlighting = True }) >> return h blockToHtml opts (BlockQuote blocks) = -- in S5, treat list in blockquote specially diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 8aa028bd7..720c00ac8 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -338,7 +338,7 @@ inlineToLaTeX (Link txt (src, _)) = char '}' inlineToLaTeX (Image _ (source, _)) = do modify $ \s -> s{ stGraphics = True } - return $ text $ "\\includegraphics{" ++ source ++ "}" + return $ text $ "\\includegraphics{" ++ source ++ "}" inlineToLaTeX (Note contents) = do st <- get put (st {stInNote = True}) diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 77dead196..c74cd81f9 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -32,6 +32,7 @@ module Text.Pandoc.Writers.Man ( writeMan) where import Text.Pandoc.Definition import Text.Pandoc.Templates import Text.Pandoc.Shared +import Text.Pandoc.Readers.TeXMath import Text.Printf ( printf ) import Data.List ( isPrefixOf, intersperse, intercalate ) import Text.PrettyPrint.HughesPJ hiding ( Str ) @@ -301,9 +302,9 @@ inlineToMan _ Ellipses = return $ text "\\&..." inlineToMan _ (Code str) = return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]" inlineToMan _ (Str str) = return $ text $ escapeString str -inlineToMan opts (Math InlineMath str) = inlineToMan opts (Code str) +inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str inlineToMan opts (Math DisplayMath str) = do - contents <- inlineToMan opts (Code str) + contents <- inlineListToMan opts $ readTeXMath str return $ text ".RS" $$ contents $$ text ".RE" inlineToMan _ (TeX _) = return empty inlineToMan _ (HtmlInline _) = return empty diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 29253ec8e..d6cd2a296 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -32,7 +32,8 @@ Markdown: <http://daringfireball.net/projects/markdown/> module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Text.Pandoc.Definition import Text.Pandoc.Templates (renderTemplate) -import Text.Pandoc.Shared +import Text.Pandoc.Shared +import Text.Pandoc.Parsing import Text.Pandoc.Blocks import Text.ParserCombinators.Parsec ( runParser, GenParser ) import Data.List ( group, isPrefixOf, find, intersperse, transpose ) @@ -40,7 +41,7 @@ import Text.PrettyPrint.HughesPJ hiding ( Str ) import Control.Monad.State type Notes = [[Block]] -type Refs = KeyTable +type Refs = [([Inline], Target)] data WriterState = WriterState { stNotes :: Notes , stRefs :: Refs , stPlain :: Bool } @@ -94,7 +95,7 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do st <- get notes' <- notesToMarkdown opts (reverse $ stNotes st) st' <- get -- note that the notes may contain refs - refs' <- keyTableToMarkdown opts (reverse $ stRefs st') + refs' <- refsToMarkdown opts (reverse $ stRefs st') let main = render $ body $+$ text "" $+$ notes' $+$ text "" $+$ refs' let context = writerVariables opts ++ [ ("toc", render toc) @@ -109,8 +110,8 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do else return main -- | Return markdown representation of reference key table. -keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc -keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat +refsToMarkdown :: WriterOptions -> Refs -> State WriterState Doc +refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat -- | Return markdown representation of a reference key. keyToMarkdown :: WriterOptions @@ -238,7 +239,7 @@ blockToMarkdown opts (Table caption aligns widths headers rows) = do caption' <- inlineListToMarkdown opts caption let caption'' = if null caption then empty - else text "" $+$ (text "Table: " <> caption') + else text "" $+$ (text ": " <> caption') headers' <- mapM (blockListToMarkdown opts) headers let alignHeader alignment = case alignment of AlignLeft -> leftAlignBlock @@ -372,14 +373,14 @@ inlineToMarkdown opts (Subscript lst) = do inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ char '\'' <> contents <> char '\'' + return $ char '‘' <> contents <> char '’' inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ char '"' <> contents <> char '"' -inlineToMarkdown _ EmDash = return $ text "--" -inlineToMarkdown _ EnDash = return $ char '-' -inlineToMarkdown _ Apostrophe = return $ char '\'' -inlineToMarkdown _ Ellipses = return $ text "..." + return $ char '“' <> contents <> char '”' +inlineToMarkdown _ EmDash = return $ char '\8212' +inlineToMarkdown _ EnDash = return $ char '\8211' +inlineToMarkdown _ Apostrophe = return $ char '\8217' +inlineToMarkdown _ Ellipses = return $ char '\8230' inlineToMarkdown _ (Code str) = let tickGroups = filter (\s -> '`' `elem` s) $ group str longest = if null tickGroups diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs new file mode 100644 index 000000000..3b5ea7481 --- /dev/null +++ b/src/Text/Pandoc/Writers/Native.hs @@ -0,0 +1,86 @@ +{- +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Native + Copyright : Copyright (C) 2006-2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Utility functions and definitions used by the various Pandoc modules. +-} +module Text.Pandoc.Writers.Native ( writeNative ) +where +import Text.Pandoc.Shared ( WriterOptions ) +import Data.List ( intercalate ) +import Text.Pandoc.Definition + +-- | Indent string as a block. +indentBy :: Int -- ^ Number of spaces to indent the block + -> Int -- ^ Number of spaces (rel to block) to indent first line + -> String -- ^ Contents of block to indent + -> String +indentBy _ _ [] = "" +indentBy num first str = + let (firstLine:restLines) = lines str + firstLineIndent = num + first + in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ + (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines) + +-- | Prettyprint list of Pandoc blocks elements. +prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks + -> [Block] -- ^ List of blocks + -> String +prettyBlockList indent [] = indentBy indent 0 "[]" +prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ + (intercalate "\n, " (map prettyBlock blocks)) ++ " ]" + +-- | Prettyprint Pandoc block element. +prettyBlock :: Block -> String +prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ + (prettyBlockList 2 blocks) +prettyBlock (OrderedList attribs blockLists) = + "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ + (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks) + blockLists)) ++ " ]" +prettyBlock (BulletList blockLists) = "BulletList\n" ++ + indentBy 2 0 ("[ " ++ (intercalate ", " + (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" +prettyBlock (DefinitionList items) = "DefinitionList\n" ++ + indentBy 2 0 ("[ " ++ (intercalate "\n, " + (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++ + indentBy 3 0 ("[ " ++ (intercalate ", " + (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++ + ")") items))) ++ " ]" +prettyBlock (Table caption aligns widths header rows) = + "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ + show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ + (intercalate ",\n" (map prettyRow rows)) ++ " ]" + where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", " + (map (\blocks -> prettyBlockList 2 blocks) + cols))) ++ " ]" +prettyBlock block = show block + +-- | Prettyprint Pandoc document. +writeNative :: WriterOptions -> Pandoc -> String +writeNative _ (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ + ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" + diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs new file mode 100644 index 000000000..5aa0fd310 --- /dev/null +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -0,0 +1,83 @@ +{- +Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.ODT + Copyright : Copyright (C) 2008-2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to ODT. +-} +module Text.Pandoc.Writers.ODT ( writeODT ) where +import Data.IORef +import System.FilePath ( (</>), takeExtension ) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 ( fromString ) +import Codec.Archive.Zip +import System.Time +import Paths_pandoc ( getDataFileName ) +import Text.Pandoc.Shared ( WriterOptions(..) ) +import Text.Pandoc.Definition +import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) +import System.Directory +import Control.Monad (liftM) + +-- | Produce an ODT file from a Pandoc document. +writeODT :: Maybe FilePath -- ^ Path specified by --reference-odt + -> WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> IO B.ByteString +writeODT mbRefOdt opts doc = do + let datadir = writerUserDataDir opts + refArchive <- liftM toArchive $ + case mbRefOdt of + Just f -> B.readFile f + Nothing -> do + let defaultODT = getDataFileName "reference.odt" >>= B.readFile + case datadir of + Nothing -> defaultODT + Just d -> do + exists <- doesFileExist (d </> "reference.odt") + if exists + then B.readFile (d </> "reference.odt") + else defaultODT + -- handle pictures + picEntriesRef <- newIORef ([] :: [Entry]) + let sourceDir = writerSourceDirectory opts + doc' <- processWithM (transformPic sourceDir picEntriesRef) doc + let newContents = writeOpenDocument opts doc' + (TOD epochtime _) <- getClockTime + let contentEntry = toEntry "content.xml" epochtime $ fromString newContents + picEntries <- readIORef picEntriesRef + let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries) + return $ fromArchive archive + +transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline +transformPic sourceDir entriesRef (Image lab (src,tit)) = do + entries <- readIORef entriesRef + let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src + catch (readEntry [] (sourceDir </> src) >>= \entry -> + modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >> + return (Image lab (newsrc, tit))) + (\_ -> return (Emph lab)) +transformPic _ _ x = return x + diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index f4dfb2aa6..14566252c 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -39,10 +39,12 @@ import Text.PrettyPrint.HughesPJ hiding ( Str ) import Control.Monad.State import Control.Applicative ( (<$>) ) +type Refs = [([Inline], Target)] + data WriterState = WriterState { stNotes :: [[Block]] - , stLinks :: KeyTable - , stImages :: KeyTable + , stLinks :: Refs + , stImages :: Refs , stHasMath :: Bool , stOptions :: WriterOptions } @@ -65,8 +67,8 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do body <- blockListToRST blocks notes <- liftM (reverse . stNotes) get >>= notesToRST -- note that the notes may contain refs, so we do them first - refs <- liftM (reverse . stLinks) get >>= keyTableToRST - pics <- liftM (reverse . stImages) get >>= pictTableToRST + refs <- liftM (reverse . stLinks) get >>= refsToRST + pics <- liftM (reverse . stImages) get >>= pictRefsToRST hasMath <- liftM stHasMath get let main = render $ body $+$ notes $+$ text "" $+$ refs $+$ pics let context = writerVariables opts ++ @@ -80,8 +82,8 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do else return main -- | Return RST representation of reference key table. -keyTableToRST :: KeyTable -> State WriterState Doc -keyTableToRST refs = mapM keyToRST refs >>= return . vcat +refsToRST :: Refs -> State WriterState Doc +refsToRST refs = mapM keyToRST refs >>= return . vcat -- | Return RST representation of a reference key. keyToRST :: ([Inline], (String, String)) @@ -107,8 +109,8 @@ noteToRST num note = do return $ marker $$ nest 3 contents -- | Return RST representation of picture reference table. -pictTableToRST :: KeyTable -> State WriterState Doc -pictTableToRST refs = mapM pictToRST refs >>= return . vcat +pictRefsToRST :: Refs -> State WriterState Doc +pictRefsToRST refs = mapM pictToRST refs >>= return . vcat -- | Return RST representation of a picture substitution reference. pictToRST :: ([Inline], (String, String)) @@ -280,16 +282,16 @@ inlineToRST (Subscript lst) = do inlineToRST (SmallCaps lst) = inlineListToRST lst inlineToRST (Quoted SingleQuote lst) = do contents <- inlineListToRST lst - return $ char '\'' <> contents <> char '\'' + return $ char '‘' <> contents <> char '’' inlineToRST (Quoted DoubleQuote lst) = do contents <- inlineListToRST lst - return $ char '"' <> contents <> char '"' + return $ char '“' <> contents <> char '”' inlineToRST (Cite _ lst) = inlineListToRST lst -inlineToRST EmDash = return $ text "--" -inlineToRST EnDash = return $ char '-' -inlineToRST Apostrophe = return $ char '\'' -inlineToRST Ellipses = return $ text "..." +inlineToRST EmDash = return $ char '\8212' +inlineToRST EnDash = return $ char '\8211' +inlineToRST Apostrophe = return $ char '\8217' +inlineToRST Ellipses = return $ char '\8230' inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``" inlineToRST (Str str) = return $ text $ escapeString str inlineToRST (Math t str) = do diff --git a/src/markdown2pdf.hs b/src/markdown2pdf.hs index d713ae263..cc6a034c0 100644 --- a/src/markdown2pdf.hs +++ b/src/markdown2pdf.hs @@ -9,14 +9,7 @@ import Control.Exception (tryJust, bracket) import System.IO (stderr) import System.IO.Error (isDoesNotExistError) import System.Environment ( getArgs, getProgName ) --- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv --- So we use System.IO.UTF8 only if we have an earlier version -#if MIN_VERSION_base(4,2,0) -import System.IO (hPutStrLn) -#else -import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents ) -import System.IO.UTF8 -#endif +import qualified Text.Pandoc.UTF8 as UTF8 import System.Exit (ExitCode (..), exitWith) import System.FilePath import System.Directory @@ -57,7 +50,7 @@ runLatexRaw latexProgram file = do takeDirectory file, dropExtension file] >> return () let pdfFile = replaceExtension file "pdf" let logFile = replaceExtension file "log" - txt <- tryJust (guard . isDoesNotExistError) (readFile logFile) + txt <- tryJust (guard . isDoesNotExistError) (UTF8.readFile logFile) let checks = checkLatex $ either (const "") id txt case checks of -- err , bib , ref , msg @@ -122,13 +115,13 @@ runBibtex file = do exit :: String -> IO a exit x = do progName <- getProgName - hPutStrLn stderr $ progName ++ ": " ++ x + UTF8.hPutStrLn stderr $ progName ++ ": " ++ x exitWith $ ExitFailure 1 saveStdin :: FilePath -> IO (Either String FilePath) saveStdin file = do - text <- getContents - writeFile file text + text <- UTF8.getContents + UTF8.writeFile file text fileExist <- doesFileExist file case fileExist of False -> return $ Left $! "Could not create " ++ file @@ -137,7 +130,7 @@ saveStdin file = do saveOutput :: FilePath -> FilePath -> IO () saveOutput input output = do copyFile input output - hPutStrLn stderr $! "Created " ++ output + UTF8.hPutStrLn stderr $! "Created " ++ output main :: IO () main = bracket @@ -161,7 +154,8 @@ main = bracket "--number-sections","--include-in-header", "--include-before-body","--include-after-body", "--custom-header","--output", - "--template", "--variable"] + "--template", "--variable", + "--csl", "--biblio", "--biblio-format"] let isOpt ('-':_) = True isOpt _ = False let opts = filter isOpt args @@ -170,8 +164,8 @@ main = bracket any (\o -> (o ++ "=") `isPrefixOf` x) goodoptslong unless (all isGoodopt opts) $ do (code, out, _err) <- readProcessWithExitCode "pandoc" ["--help"] "" - putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:" - putStr $ unlines $ + UTF8.putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:" + UTF8.putStr $ unlines $ filter (\l -> any (`isInfixOf` l) goodoptslong) $ lines out exitWith code diff --git a/src/pandoc.hs b/src/pandoc.hs index 0560efc0a..a2be60d52 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -30,9 +30,9 @@ writers. -} module Main where import Text.Pandoc -import Text.Pandoc.ODT import Text.Pandoc.Writers.S5 (s5HeaderIncludes) -import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile ) +import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, + headerShift ) #ifdef _HIGHLIGHTING import Text.Pandoc.Highlighting ( languages ) #endif @@ -45,14 +45,7 @@ import Data.Char ( toLower, isDigit ) import Data.List ( intercalate, isSuffixOf ) import System.Directory ( getAppUserDataDirectory ) import System.IO ( stdout, stderr ) --- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv --- So we use System.IO.UTF8 only if we have an earlier version -#if MIN_VERSION_base(4,2,0) -import System.IO ( hPutStr, hPutStrLn ) -#else -import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents ) -import System.IO.UTF8 -#endif +import qualified Text.Pandoc.UTF8 as UTF8 #ifdef _CITEPROC import Text.CSL import Text.Pandoc.Biblio @@ -60,7 +53,9 @@ import Text.Pandoc.Biblio import Control.Monad (when, unless, liftM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) import Network.URI (parseURI, isURI) -import Data.ByteString.Lazy.UTF8 (toString) +import qualified Data.ByteString.Lazy as B +import Data.ByteString.Lazy.UTF8 (toString, fromString) +import Codec.Binary.UTF8.String (decodeString) copyrightMessage :: String copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++ @@ -108,13 +103,14 @@ readPandoc _ = read -- | Association list of formats and writers. writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] -writers = [("native" , writeDoc) +writers = [("native" , writeNative) ,("html" , writeHtmlString) ,("html+lhs" , writeHtmlString) ,("s5" , writeS5String) ,("docbook" , writeDocbook) ,("opendocument" , writeOpenDocument) - ,("odt" , writeOpenDocument) + ,("odt" , \_ _ -> "") + ,("epub" , \_ _ -> "") ,("latex" , writeLaTeX) ,("latex+lhs" , writeLaTeX) ,("context" , writeConTeXt) @@ -130,17 +126,7 @@ writers = [("native" , writeDoc) ] isNonTextOutput :: String -> Bool -isNonTextOutput = (`elem` ["odt"]) - --- | Writer for Pandoc native format. -writeDoc :: WriterOptions -> Pandoc -> String -writeDoc _ = prettyPandoc - -headerShift :: Int -> Pandoc -> Pandoc -headerShift n = processWith shift - where shift :: Block -> Block - shift (Header level inner) = Header (level + n) inner - shift x = x +isNonTextOutput = (`elem` ["odt","epub"]) -- | Data structure for command line options. data Opt = Opt @@ -154,8 +140,6 @@ data Opt = Opt , optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply , optTemplate :: String -- ^ Custom template , optVariables :: [(String,String)] -- ^ Template variables to set - , optBefore :: [String] -- ^ Texts to include before body - , optAfter :: [String] -- ^ Texts to include after body , optOutputFile :: String -- ^ Name of output file , optNumberSections :: Bool -- ^ Number sections in LaTeX , optIncremental :: Bool -- ^ Use incremental lists in S5 @@ -163,6 +147,8 @@ data Opt = Opt , optSmart :: Bool -- ^ Use smart typography , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt + , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet + , optEPUBMetadata :: String -- ^ EPUB metadata , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optStrict :: Bool -- ^ Use strict markdown syntax @@ -194,8 +180,6 @@ defaultOpts = Opt , optTransforms = [] , optTemplate = "" , optVariables = [] - , optBefore = [] - , optAfter = [] , optOutputFile = "-" -- "-" means stdout , optNumberSections = False , optIncremental = False @@ -203,6 +187,8 @@ defaultOpts = Opt , optSmart = False , optHTMLMathMethod = PlainMath , optReferenceODT = Nothing + , optEPUBStylesheet = Nothing + , optEPUBMetadata = "" , optDumpArgs = False , optIgnoreArgs = False , optStrict = False @@ -343,7 +329,7 @@ options = "references" -> return ReferenceObfuscation "javascript" -> return JavascriptObfuscation "none" -> return NoObfuscation - _ -> hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >> + _ -> UTF8.hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >> exitWith (ExitFailure 6) return opt { optEmailObfuscation = method }) "none|javascript|references") @@ -377,7 +363,7 @@ options = return opt{ optTransforms = headerShift shift : oldTransforms } else do - hPutStrLn stderr $ "base-header-level must be a number >= 1" + UTF8.hPutStrLn stderr $ "base-header-level must be a number >= 1" exitWith $ ExitFailure 19) "LEVEL") "" -- "Headers base level" @@ -385,7 +371,7 @@ options = , Option "" ["template"] (ReqArg (\arg opt -> do - text <- readFile arg + text <- UTF8.readFile arg return opt{ optTemplate = text, optStandalone = True }) "FILENAME") @@ -399,7 +385,7 @@ options = let newvars = optVariables opt ++ [(k,v)] return opt{ optVariables = newvars } _ -> do - hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)" + UTF8.hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)" exitWith $ ExitFailure 17) "FILENAME") "" -- "Use custom template" @@ -417,7 +403,7 @@ options = , Option "H" ["include-in-header"] (ReqArg (\arg opt -> do - text <- readFile arg + text <- UTF8.readFile arg -- add new ones to end, so they're included in order specified let newvars = optVariables opt ++ [("header-includes",text)] return opt { optVariables = newvars, @@ -428,7 +414,7 @@ options = , Option "B" ["include-before-body"] (ReqArg (\arg opt -> do - text <- readFile arg + text <- UTF8.readFile arg -- add new ones to end, so they're included in order specified let newvars = optVariables opt ++ [("include-before",text)] return opt { optVariables = newvars, @@ -439,7 +425,7 @@ options = , Option "A" ["include-after-body"] (ReqArg (\arg opt -> do - text <- readFile arg + text <- UTF8.readFile arg -- add new ones to end, so they're included in order specified let newvars = optVariables opt ++ [("include-after",text)] return opt { optVariables = newvars, @@ -450,7 +436,7 @@ options = , Option "C" ["custom-header"] (ReqArg (\arg opt -> do - text <- readFile arg + text <- UTF8.readFile arg let newVars = ("legacy-header", text) : optVariables opt return opt { optVariables = newVars , optStandalone = True }) @@ -473,12 +459,28 @@ options = "FILENAME") "" -- "Path of custom reference.odt" + , Option "" ["epub-stylesheet"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + return opt { optEPUBStylesheet = Just text }) + "FILENAME") + "" -- "Path of epub.css" + + , Option "" ["epub-metadata"] + (ReqArg + (\arg opt -> do + text <- UTF8.readFile arg + return opt { optEPUBMetadata = text }) + "FILENAME") + "" -- "Path of epub metadata file" + , Option "D" ["print-default-template"] (ReqArg (\arg _ -> do templ <- getDefaultTemplate Nothing arg case templ of - Right t -> hPutStr stdout t + Right t -> UTF8.hPutStr stdout t Left e -> error $ show e exitWith ExitSuccess) "FORMAT") @@ -520,7 +522,7 @@ options = (NoArg (\_ -> do prg <- getProgName - hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++ + UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++ copyrightMessage) exitWith ExitSuccess )) "" -- "Print version" @@ -529,7 +531,7 @@ options = (NoArg (\_ -> do prg <- getProgName - hPutStr stdout (usageMessage prg options) + UTF8.hPutStr stdout (usageMessage prg options) exitWith ExitSuccess )) "" -- "Show help" ] @@ -586,13 +588,14 @@ defaultWriterName x = ".texinfo" -> "texinfo" ".db" -> "docbook" ".odt" -> "odt" + ".epub" -> "epub" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" main :: IO () main = do - rawArgs <- getArgs + rawArgs <- liftM (map decodeString) getArgs prg <- getProgName let compatMode = (prg == "hsmarkdown") @@ -602,8 +605,8 @@ main = do unless (null errors) $ do name <- getProgName - mapM_ (\e -> hPutStr stderr (name ++ ": ") >> hPutStr stderr e) errors - hPutStrLn stderr $ "Try " ++ name ++ " --help for more information." + mapM_ (\e -> UTF8.hPutStr stderr (name ++ ": ") >> UTF8.hPutStr stderr e) errors + UTF8.hPutStrLn stderr $ "Try " ++ name ++ " --help for more information." exitWith $ ExitFailure 2 let defaultOpts' = if compatMode @@ -622,8 +625,6 @@ main = do , optWriter = writerName , optParseRaw = parseRaw , optVariables = variables - , optBefore = befores - , optAfter = afters , optTableOfContents = toc , optTransforms = transforms , optTemplate = template @@ -634,6 +635,8 @@ main = do , optSmart = smart , optHTMLMathMethod = mathMethod , optReferenceODT = referenceODT + , optEPUBStylesheet = epubStylesheet + , optEPUBMetadata = epubMetadata , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs , optStrict = strict @@ -652,13 +655,13 @@ main = do } = opts when dumpArgs $ - do hPutStrLn stdout outputFile - mapM_ (\arg -> hPutStrLn stdout arg) args + do UTF8.hPutStrLn stdout outputFile + mapM_ (\arg -> UTF8.hPutStrLn stdout arg) args exitWith ExitSuccess -- warn about deprecated options case lookup "legacy-header" variables of - Just _ -> hPutStrLn stderr $ + Just _ -> UTF8.hPutStrLn stderr $ "Warning: The -C/--custom-header is deprecated.\n" ++ "Please transition to using --template instead." Nothing -> return () @@ -687,9 +690,13 @@ main = do Just r -> return r Nothing -> error ("Unknown reader: " ++ readerName') - writer <- case (lookup writerName' writers) of - Just r -> return r - Nothing -> error ("Unknown writer: " ++ writerName') + let writer = case lookup writerName' writers of + Just _ | writerName' == "epub" -> writeEPUB epubStylesheet + Just _ | writerName' == "odt" -> writeODT referenceODT + Just r -> \o -> + return . fromString . r o + Nothing -> error $ "Unknown writer: " ++ + writerName' templ <- getDefaultTemplate datadir writerName' let defaultTemplate = case templ of @@ -722,6 +729,10 @@ main = do return $ ("mathml-script", s) : variables' _ -> return variables' + let sourceDir = if null sources + then "." + else takeDirectory (head sources) + let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, @@ -737,13 +748,13 @@ main = do stateColumns = columns, stateStrict = strict, stateIndentedCodeClasses = codeBlockClasses } + let writerOptions = WriterOptions { writerStandalone = standalone', writerTemplate = if null template then defaultTemplate else template, writerVariables = variables'', - writerIncludeBefore = concat befores, - writerIncludeAfter = concat afters, + writerEPUBMetadata = epubMetadata, writerTabStop = tabStop, writerTableOfContents = toc && writerName' /= "s5", @@ -761,23 +772,21 @@ main = do writerEmailObfuscation = if strict then ReferenceObfuscation else obfuscationMethod, - writerIdentifierPrefix = idPrefix } + writerIdentifierPrefix = idPrefix, + writerSourceDirectory = sourceDir, + writerUserDataDir = datadir } when (isNonTextOutput writerName' && outputFile == "-") $ - do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++ + do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++ "Specify an output file using the -o option.") exitWith $ ExitFailure 5 - let sourceDirRelative = if null sources - then "" - else takeDirectory (head sources) - let readSources [] = mapM readSource ["-"] readSources srcs = mapM readSource srcs - readSource "-" = getContents + readSource "-" = UTF8.getContents readSource src = case parseURI src of Just u -> readURI u - Nothing -> readFile src + Nothing -> UTF8.readFile src readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>= return . toString -- treat all as UTF8 @@ -794,10 +803,8 @@ main = do return doc' #endif - let writerOutput = writer writerOptions doc'' ++ "\n" + writerOutput <- writer writerOptions doc'' - case writerName' of - "odt" -> saveOpenDocumentAsODT datadir outputFile sourceDirRelative referenceODT writerOutput - _ -> if outputFile == "-" - then putStr writerOutput - else writeFile outputFile writerOutput + if outputFile == "-" + then B.putStrLn writerOutput + else B.writeFile outputFile writerOutput |