aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc.hs8
-rw-r--r--src/Text/Pandoc/Definition.hs18
-rw-r--r--src/Text/Pandoc/ODT.hs102
-rw-r--r--src/Text/Pandoc/Parsing.hs698
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs3
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs14
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs106
-rw-r--r--src/Text/Pandoc/Readers/RST.hs176
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs250
-rw-r--r--src/Text/Pandoc/Shared.hs735
-rw-r--r--src/Text/Pandoc/Templates.hs2
-rw-r--r--src/Text/Pandoc/UTF8.hs72
-rw-r--r--src/Text/Pandoc/UUID.hs77
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs283
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs5
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs2
-rw-r--r--src/Text/Pandoc/Writers/Man.hs5
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs25
-rw-r--r--src/Text/Pandoc/Writers/Native.hs86
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs83
-rw-r--r--src/Text/Pandoc/Writers/RST.hs30
-rw-r--r--src/markdown2pdf.hs26
-rw-r--r--src/pandoc.hs139
23 files changed, 1659 insertions, 1286 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 9cad5fb34..2b6474b9f 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -71,6 +71,7 @@ module Text.Pandoc
, NoteTable
, HeaderType (..)
-- * Writers: converting /from/ Pandoc format
+ , writeNative
, writeMarkdown
, writePlain
, writeRST
@@ -86,7 +87,8 @@ module Text.Pandoc
, writeMan
, writeMediaWiki
, writeRTF
- , prettyPandoc
+ , writeODT
+ , writeEPUB
-- * Writer options used in writers
, WriterOptions (..)
, HTMLMathMethod (..)
@@ -102,6 +104,7 @@ import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.HTML
+import Text.Pandoc.Writers.Native
import Text.Pandoc.Writers.Markdown
import Text.Pandoc.Writers.RST
import Text.Pandoc.Writers.LaTeX
@@ -109,12 +112,15 @@ import Text.Pandoc.Writers.ConTeXt
import Text.Pandoc.Writers.Texinfo
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.S5
+import Text.Pandoc.Writers.ODT
+import Text.Pandoc.Writers.EPUB
import Text.Pandoc.Writers.Docbook
import Text.Pandoc.Writers.OpenDocument
import Text.Pandoc.Writers.Man
import Text.Pandoc.Writers.RTF
import Text.Pandoc.Writers.MediaWiki
import Text.Pandoc.Templates
+import Text.Pandoc.Parsing
import Text.Pandoc.Shared
import Data.Version (showVersion)
import Paths_pandoc (version)
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
index c8ba9249b..fffca3b2e 100644
--- a/src/Text/Pandoc/Definition.hs
+++ b/src/Text/Pandoc/Definition.hs
@@ -33,19 +33,19 @@ module Text.Pandoc.Definition where
import Data.Generics
-data Pandoc = Pandoc Meta [Block] deriving (Eq, Read, Show, Typeable, Data)
+data Pandoc = Pandoc Meta [Block] deriving (Eq, Ord, Read, Show, Typeable, Data)
-- | Bibliographic information for the document: title, authors, date.
data Meta = Meta { docTitle :: [Inline]
, docAuthors :: [[Inline]]
, docDate :: [Inline] }
- deriving (Eq, Show, Read, Typeable, Data)
+ deriving (Eq, Ord, Show, Read, Typeable, Data)
-- | Alignment of a table column.
data Alignment = AlignLeft
| AlignRight
| AlignCenter
- | AlignDefault deriving (Eq, Show, Read, Typeable, Data)
+ | AlignDefault deriving (Eq, Ord, Show, Read, Typeable, Data)
-- | List attributes.
type ListAttributes = (Int, ListNumberStyle, ListNumberDelim)
@@ -57,13 +57,13 @@ data ListNumberStyle = DefaultStyle
| LowerRoman
| UpperRoman
| LowerAlpha
- | UpperAlpha deriving (Eq, Show, Read, Typeable, Data)
+ | UpperAlpha deriving (Eq, Ord, Show, Read, Typeable, Data)
-- | Delimiter of list numbers.
data ListNumberDelim = DefaultDelim
| Period
| OneParen
- | TwoParens deriving (Eq, Show, Read, Typeable, Data)
+ | TwoParens deriving (Eq, Ord, Show, Read, Typeable, Data)
-- | Attributes: identifier, classes, key-value pairs
type Attr = (String, [String], [(String, String)])
@@ -91,16 +91,16 @@ data Block
-- column headers (each a list of blocks), and
-- rows (each a list of lists of blocks)
| Null -- ^ Nothing
- deriving (Eq, Read, Show, Typeable, Data)
+ deriving (Eq, Ord, Read, Show, Typeable, Data)
-- | Type of quotation marks to use in Quoted inline.
-data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Read, Typeable, Data)
+data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Ord, Read, Typeable, Data)
-- | Link target (URL, title).
type Target = (String, String)
-- | Type of math element (display or inline).
-data MathType = DisplayMath | InlineMath deriving (Show, Eq, Read, Typeable, Data)
+data MathType = DisplayMath | InlineMath deriving (Show, Eq, Ord, Read, Typeable, Data)
-- | Inline elements.
data Inline
@@ -127,7 +127,7 @@ data Inline
| Image [Inline] Target -- ^ Image: alt text (list of inlines), target
-- and target
| Note [Block] -- ^ Footnote or endnote
- deriving (Show, Eq, Read, Typeable, Data)
+ deriving (Show, Eq, Ord, Read, Typeable, Data)
-- | Applies a transformation on @a@s to matching elements in a @b@.
processWith :: (Data a, Data b) => (a -> a) -> b -> b
diff --git a/src/Text/Pandoc/ODT.hs b/src/Text/Pandoc/ODT.hs
deleted file mode 100644
index d978c0cb4..000000000
--- a/src/Text/Pandoc/ODT.hs
+++ /dev/null
@@ -1,102 +0,0 @@
-{-
-Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.ODT
- Copyright : Copyright (C) 2008-2010 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for producing an ODT file from OpenDocument XML.
--}
-module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where
-import Data.List ( find )
-import System.FilePath ( (</>), takeFileName )
-import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Lazy.UTF8 ( fromString )
-import Prelude hiding ( writeFile, readFile )
-import Codec.Archive.Zip
-import Control.Applicative ( (<$>) )
-import Text.ParserCombinators.Parsec
-import System.Time
-import Paths_pandoc ( getDataFileName )
-import System.Directory
-import Control.Monad (liftM)
-
--- | Produce an ODT file from OpenDocument XML.
-saveOpenDocumentAsODT :: Maybe FilePath -- ^ Path of user data directory
- -> FilePath -- ^ Pathname of ODT file to be produced
- -> FilePath -- ^ Relative directory of source file
- -> Maybe FilePath -- ^ Path specified by --reference-odt
- -> String -- ^ OpenDocument XML contents
- -> IO ()
-saveOpenDocumentAsODT datadir destinationODTPath sourceDirRelative mbRefOdt xml = do
- refArchive <- liftM toArchive $
- case mbRefOdt of
- Just f -> B.readFile f
- Nothing -> do
- let defaultODT = getDataFileName "reference.odt" >>= B.readFile
- case datadir of
- Nothing -> defaultODT
- Just d -> do
- exists <- doesFileExist (d </> "reference.odt")
- if exists
- then B.readFile (d </> "reference.odt")
- else defaultODT
- -- handle pictures
- let (newContents, pics) =
- case runParser pPictures [] "OpenDocument XML contents" xml of
- Left err -> error $ show err
- Right x -> x
- picEntries <- mapM (makePictureEntry sourceDirRelative) pics
- (TOD epochTime _) <- getClockTime
- let contentEntry = toEntry "content.xml" epochTime $ fromString newContents
- let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
- B.writeFile destinationODTPath $ fromArchive archive
-
-makePictureEntry :: FilePath -- ^ Relative directory of source file
- -> (FilePath, String) -- ^ Path and new path of picture
- -> IO Entry
-makePictureEntry sourceDirRelative (path, newPath) = do
- entry <- readEntry [] $ sourceDirRelative </> path
- return (entry { eRelativePath = newPath })
-
-pPictures :: GenParser Char [(FilePath, String)] ([Char], [(FilePath, String)])
-pPictures = do
- contents <- concat <$> many (pPicture <|> many1 (noneOf "<") <|> string "<")
- pics <- getState
- return (contents, pics)
-
-pPicture :: GenParser Char [(FilePath, String)] [Char]
-pPicture = try $ do
- string "<draw:image xlink:href=\""
- path <- manyTill anyChar (char '"')
- let filename = takeFileName path
- pics <- getState
- newPath <- case find (\(o, _) -> o == path) pics of
- Just (_, new) -> return new
- Nothing -> do
- -- get a unique name
- let dups = length $ (filter (\(o, _) -> takeFileName o == filename)) pics
- let new = "Pictures/" ++ replicate dups '0' ++ filename
- updateState ((path, new) :)
- return new
- return $ "<draw:image xlink:href=\"" ++ newPath ++ "\""
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
new file mode 100644
index 000000000..19338661d
--- /dev/null
+++ b/src/Text/Pandoc/Parsing.hs
@@ -0,0 +1,698 @@
+{-
+Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Parsing
+ Copyright : Copyright (C) 2006-2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+A utility library with parsers used in pandoc readers.
+-}
+module Text.Pandoc.Parsing ( (>>~),
+ anyLine,
+ many1Till,
+ notFollowedBy',
+ oneOfStrings,
+ spaceChar,
+ skipSpaces,
+ blankline,
+ blanklines,
+ enclosed,
+ stringAnyCase,
+ parseFromString,
+ lineClump,
+ charsInBalanced,
+ charsInBalanced',
+ romanNumeral,
+ emailAddress,
+ uri,
+ withHorizDisplacement,
+ nullBlock,
+ failIfStrict,
+ failUnlessLHS,
+ escaped,
+ anyOrderedListMarker,
+ orderedListMarker,
+ charRef,
+ tableWith,
+ gridTableWith,
+ readWith,
+ testStringWith,
+ ParserState (..),
+ defaultParserState,
+ HeaderType (..),
+ ParserContext (..),
+ QuoteContext (..),
+ NoteTable,
+ KeyTable,
+ Key (..),
+ lookupKeySrc,
+ refsMatch )
+where
+
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
+import Text.ParserCombinators.Parsec
+import Text.Pandoc.CharacterReferences ( characterReference )
+import Data.Char ( toLower, toUpper, ord, isAscii )
+import Data.List ( intercalate, transpose )
+import Network.URI ( parseURI, URI (..), isAllowedInURI )
+import Control.Monad ( join, liftM )
+import Text.Pandoc.Shared
+import qualified Data.Map as M
+
+-- | Like >>, but returns the operation on the left.
+-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
+(>>~) :: (Monad m) => m a -> m b -> m a
+a >>~ b = a >>= \x -> b >> return x
+
+-- | Parse any line of text
+anyLine :: GenParser Char st [Char]
+anyLine = manyTill anyChar newline
+
+-- | Like @manyTill@, but reads at least one item.
+many1Till :: GenParser tok st a
+ -> GenParser tok st end
+ -> GenParser tok st [a]
+many1Till p end = do
+ first <- p
+ rest <- manyTill p end
+ return (first:rest)
+
+-- | A more general form of @notFollowedBy@. This one allows any
+-- type of parser to be specified, and succeeds only if that parser fails.
+-- It does not consume any input.
+notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
+notFollowedBy' p = try $ join $ do a <- try p
+ return (unexpected (show a))
+ <|>
+ return (return ())
+-- (This version due to Andrew Pimlott on the Haskell mailing list.)
+
+-- | Parses one of a list of strings (tried in order).
+oneOfStrings :: [String] -> GenParser Char st String
+oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
+
+-- | Parses a space or tab.
+spaceChar :: CharParser st Char
+spaceChar = char ' ' <|> char '\t'
+
+-- | Skips zero or more spaces or tabs.
+skipSpaces :: GenParser Char st ()
+skipSpaces = skipMany spaceChar
+
+-- | Skips zero or more spaces or tabs, then reads a newline.
+blankline :: GenParser Char st Char
+blankline = try $ skipSpaces >> newline
+
+-- | Parses one or more blank lines and returns a string of newlines.
+blanklines :: GenParser Char st [Char]
+blanklines = many1 blankline
+
+-- | Parses material enclosed between start and end parsers.
+enclosed :: GenParser Char st t -- ^ start parser
+ -> GenParser Char st end -- ^ end parser
+ -> GenParser Char st a -- ^ content parser (to be used repeatedly)
+ -> GenParser Char st [a]
+enclosed start end parser = try $
+ start >> notFollowedBy space >> many1Till parser end
+
+-- | Parse string, case insensitive.
+stringAnyCase :: [Char] -> CharParser st String
+stringAnyCase [] = string ""
+stringAnyCase (x:xs) = do
+ firstChar <- char (toUpper x) <|> char (toLower x)
+ rest <- stringAnyCase xs
+ return (firstChar:rest)
+
+-- | Parse contents of 'str' using 'parser' and return result.
+parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
+parseFromString parser str = do
+ oldPos <- getPosition
+ oldInput <- getInput
+ setInput str
+ result <- parser
+ setInput oldInput
+ setPosition oldPos
+ return result
+
+-- | Parse raw line block up to and including blank lines.
+lineClump :: GenParser Char st String
+lineClump = blanklines
+ <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
+
+-- | Parse a string of characters between an open character
+-- and a close character, including text between balanced
+-- pairs of open and close, which must be different. For example,
+-- @charsInBalanced '(' ')'@ will parse "(hello (there))"
+-- and return "hello (there)". Stop if a blank line is
+-- encountered.
+charsInBalanced :: Char -> Char -> GenParser Char st String
+charsInBalanced open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close, '\n']))
+ <|> (do res <- charsInBalanced open close
+ return $ [open] ++ res ++ [close])
+ <|> try (string "\n" >>~ notFollowedBy' blanklines)
+ char close
+ return $ concat raw
+
+-- | Like @charsInBalanced@, but allow blank lines in the content.
+charsInBalanced' :: Char -> Char -> GenParser Char st String
+charsInBalanced' open close = try $ do
+ char open
+ raw <- many $ (many1 (noneOf [open, close]))
+ <|> (do res <- charsInBalanced' open close
+ return $ [open] ++ res ++ [close])
+ char close
+ return $ concat raw
+
+-- Auxiliary functions for romanNumeral:
+
+lowercaseRomanDigits :: [Char]
+lowercaseRomanDigits = ['i','v','x','l','c','d','m']
+
+uppercaseRomanDigits :: [Char]
+uppercaseRomanDigits = map toUpper lowercaseRomanDigits
+
+-- | Parses a roman numeral (uppercase or lowercase), returns number.
+romanNumeral :: Bool -- ^ Uppercase if true
+ -> GenParser Char st Int
+romanNumeral upperCase = do
+ let romanDigits = if upperCase
+ then uppercaseRomanDigits
+ else lowercaseRomanDigits
+ lookAhead $ oneOf romanDigits
+ let [one, five, ten, fifty, hundred, fivehundred, thousand] =
+ map char romanDigits
+ thousands <- many thousand >>= (return . (1000 *) . length)
+ ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
+ fivehundreds <- many fivehundred >>= (return . (500 *) . length)
+ fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
+ hundreds <- many hundred >>= (return . (100 *) . length)
+ nineties <- option 0 $ try $ ten >> hundred >> return 90
+ fifties <- many fifty >>= (return . (50 *) . length)
+ forties <- option 0 $ try $ ten >> fifty >> return 40
+ tens <- many ten >>= (return . (10 *) . length)
+ nines <- option 0 $ try $ one >> ten >> return 9
+ fives <- many five >>= (return . (5 *) . length)
+ fours <- option 0 $ try $ one >> five >> return 4
+ ones <- many one >>= (return . length)
+ let total = thousands + ninehundreds + fivehundreds + fourhundreds +
+ hundreds + nineties + fifties + forties + tens + nines +
+ fives + fours + ones
+ if total == 0
+ then fail "not a roman numeral"
+ else return total
+
+-- Parsers for email addresses and URIs
+
+emailChar :: GenParser Char st Char
+emailChar = alphaNum <|> oneOf "-+_."
+
+domainChar :: GenParser Char st Char
+domainChar = alphaNum <|> char '-'
+
+domain :: GenParser Char st [Char]
+domain = do
+ first <- many1 domainChar
+ dom <- many1 $ try (char '.' >> many1 domainChar )
+ return $ intercalate "." (first:dom)
+
+-- | Parses an email address; returns original and corresponding
+-- escaped mailto: URI.
+emailAddress :: GenParser Char st (String, String)
+emailAddress = try $ do
+ firstLetter <- alphaNum
+ restAddr <- many emailChar
+ let addr = firstLetter:restAddr
+ char '@'
+ dom <- domain
+ let full = addr ++ '@':dom
+ return (full, escapeURI $ "mailto:" ++ full)
+
+-- | Parses a URI. Returns pair of original and URI-escaped version.
+uri :: GenParser Char st (String, String)
+uri = try $ do
+ let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:",
+ "news:", "telnet:" ]
+ lookAhead $ oneOfStrings protocols
+ -- scan non-ascii characters and ascii characters allowed in a URI
+ str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c)
+ -- now see if they amount to an absolute URI
+ case parseURI (escapeURI str) of
+ Just uri' -> if uriScheme uri' `elem` protocols
+ then return (str, show uri')
+ else fail "not a URI"
+ Nothing -> fail "not a URI"
+
+-- | Applies a parser, returns tuple of its results and its horizontal
+-- displacement (the difference between the source column at the end
+-- and the source column at the beginning). Vertical displacement
+-- (source row) is ignored.
+withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
+ -> GenParser Char st (a, Int) -- ^ (result, displacement)
+withHorizDisplacement parser = do
+ pos1 <- getPosition
+ result <- parser
+ pos2 <- getPosition
+ return (result, sourceColumn pos2 - sourceColumn pos1)
+
+-- | Parses a character and returns 'Null' (so that the parser can move on
+-- if it gets stuck).
+nullBlock :: GenParser Char st Block
+nullBlock = anyChar >> return Null
+
+-- | Fail if reader is in strict markdown syntax mode.
+failIfStrict :: GenParser Char ParserState ()
+failIfStrict = do
+ state <- getState
+ if stateStrict state then fail "strict mode" else return ()
+
+-- | Fail unless we're in literate haskell mode.
+failUnlessLHS :: GenParser tok ParserState ()
+failUnlessLHS = do
+ state <- getState
+ if stateLiterateHaskell state then return () else fail "Literate haskell feature"
+
+-- | Parses backslash, then applies character parser.
+escaped :: GenParser Char st Char -- ^ Parser for character to escape
+ -> GenParser Char st Inline
+escaped parser = try $ do
+ char '\\'
+ result <- parser
+ return (Str [result])
+
+-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
+upperRoman :: GenParser Char st (ListNumberStyle, Int)
+upperRoman = do
+ num <- romanNumeral True
+ return (UpperRoman, num)
+
+-- | Parses a lowercase roman numeral and returns (LowerRoman, number).
+lowerRoman :: GenParser Char st (ListNumberStyle, Int)
+lowerRoman = do
+ num <- romanNumeral False
+ return (LowerRoman, num)
+
+-- | Parses a decimal numeral and returns (Decimal, number).
+decimal :: GenParser Char st (ListNumberStyle, Int)
+decimal = do
+ num <- many1 digit
+ return (Decimal, read num)
+
+-- | Parses a '@' and optional label and
+-- returns (DefaultStyle, [next example number]). The next
+-- example number is incremented in parser state, and the label
+-- (if present) is added to the label table.
+exampleNum :: GenParser Char ParserState (ListNumberStyle, Int)
+exampleNum = do
+ char '@'
+ lab <- many (alphaNum <|> oneOf "_-")
+ st <- getState
+ let num = stateNextExample st
+ let newlabels = if null lab
+ then stateExamples st
+ else M.insert lab num $ stateExamples st
+ updateState $ \s -> s{ stateNextExample = num + 1
+ , stateExamples = newlabels }
+ return (Example, num)
+
+-- | Parses a '#' returns (DefaultStyle, 1).
+defaultNum :: GenParser Char st (ListNumberStyle, Int)
+defaultNum = do
+ char '#'
+ return (DefaultStyle, 1)
+
+-- | Parses a lowercase letter and returns (LowerAlpha, number).
+lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
+lowerAlpha = do
+ ch <- oneOf ['a'..'z']
+ return (LowerAlpha, ord ch - ord 'a' + 1)
+
+-- | Parses an uppercase letter and returns (UpperAlpha, number).
+upperAlpha :: GenParser Char st (ListNumberStyle, Int)
+upperAlpha = do
+ ch <- oneOf ['A'..'Z']
+ return (UpperAlpha, ord ch - ord 'A' + 1)
+
+-- | Parses a roman numeral i or I
+romanOne :: GenParser Char st (ListNumberStyle, Int)
+romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
+ (char 'I' >> return (UpperRoman, 1))
+
+-- | Parses an ordered list marker and returns list attributes.
+anyOrderedListMarker :: GenParser Char ParserState ListAttributes
+anyOrderedListMarker = choice $
+ [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
+ numParser <- [decimal, exampleNum, defaultNum, romanOne,
+ lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
+
+-- | Parses a list number (num) followed by a period, returns list attributes.
+inPeriod :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inPeriod num = try $ do
+ (style, start) <- num
+ char '.'
+ let delim = if style == DefaultStyle
+ then DefaultDelim
+ else Period
+ return (start, style, delim)
+
+-- | Parses a list number (num) followed by a paren, returns list attributes.
+inOneParen :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inOneParen num = try $ do
+ (style, start) <- num
+ char ')'
+ return (start, style, OneParen)
+
+-- | Parses a list number (num) enclosed in parens, returns list attributes.
+inTwoParens :: GenParser Char st (ListNumberStyle, Int)
+ -> GenParser Char st ListAttributes
+inTwoParens num = try $ do
+ char '('
+ (style, start) <- num
+ char ')'
+ return (start, style, TwoParens)
+
+-- | Parses an ordered list marker with a given style and delimiter,
+-- returns number.
+orderedListMarker :: ListNumberStyle
+ -> ListNumberDelim
+ -> GenParser Char ParserState Int
+orderedListMarker style delim = do
+ let num = defaultNum <|> -- # can continue any kind of list
+ case style of
+ DefaultStyle -> decimal
+ Example -> exampleNum
+ Decimal -> decimal
+ UpperRoman -> upperRoman
+ LowerRoman -> lowerRoman
+ UpperAlpha -> upperAlpha
+ LowerAlpha -> lowerAlpha
+ let context = case delim of
+ DefaultDelim -> inPeriod
+ Period -> inPeriod
+ OneParen -> inOneParen
+ TwoParens -> inTwoParens
+ (start, _, _) <- context num
+ return start
+
+-- | Parses a character reference and returns a Str element.
+charRef :: GenParser Char st Inline
+charRef = do
+ c <- characterReference
+ return $ Str [c]
+
+-- | Parse a table using 'headerParser', 'rowParser',
+-- 'lineParser', and 'footerParser'.
+tableWith :: GenParser Char ParserState ([[Block]], [Alignment], [Int])
+ -> ([Int] -> GenParser Char ParserState [[Block]])
+ -> GenParser Char ParserState sep
+ -> GenParser Char ParserState end
+ -> GenParser Char ParserState [Inline]
+ -> GenParser Char ParserState Block
+tableWith headerParser rowParser lineParser footerParser captionParser = try $ do
+ caption' <- option [] captionParser
+ (heads, aligns, indices) <- headerParser
+ lines' <- rowParser indices `sepEndBy` lineParser
+ footerParser
+ caption <- if null caption'
+ then option [] captionParser
+ else return caption'
+ state <- getState
+ let numColumns = stateColumns state
+ let widths = widthsFromIndices numColumns indices
+ return $ Table caption aligns widths heads lines'
+
+-- Calculate relative widths of table columns, based on indices
+widthsFromIndices :: Int -- Number of columns on terminal
+ -> [Int] -- Indices
+ -> [Double] -- Fractional relative sizes of columns
+widthsFromIndices _ [] = []
+widthsFromIndices numColumns indices =
+ let lengths' = zipWith (-) indices (0:indices)
+ lengths = reverse $
+ case reverse lengths' of
+ [] -> []
+ [x] -> [x]
+ -- compensate for the fact that intercolumn
+ -- spaces are counted in widths of all columns
+ -- but the last...
+ (x:y:zs) -> if x < y && y - x <= 2
+ then y:y:zs
+ else x:y:zs
+ totLength = sum lengths
+ quotient = if totLength > numColumns
+ then fromIntegral totLength
+ else fromIntegral numColumns
+ fracs = map (\l -> (fromIntegral l) / quotient) lengths in
+ tail fracs
+
+-- Parse a grid table: starts with row of '-' on top, then header
+-- (which may be grid), then the rows,
+-- which may be grid, separated by blank lines, and
+-- ending with a footer (dashed line followed by blank line).
+gridTableWith :: GenParser Char ParserState Block -- ^ Block parser
+ -> GenParser Char ParserState [Inline] -- ^ Caption parser
+ -> Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+gridTableWith block tableCaption headless =
+ tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption
+
+gridTableSplitLine :: [Int] -> String -> [String]
+gridTableSplitLine indices line =
+ map removeFinalBar $ tail $ splitByIndices (init indices) line
+
+gridPart :: Char -> GenParser Char st (Int, Int)
+gridPart ch = do
+ dashes <- many1 (char ch)
+ char '+'
+ return (length dashes, length dashes + 1)
+
+gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
+gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
+
+removeFinalBar :: String -> String
+removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") .
+ reverse
+
+-- | Separator between rows of grid table.
+gridTableSep :: Char -> GenParser Char ParserState Char
+gridTableSep ch = try $ gridDashedLines ch >> return '\n'
+
+-- | Parse header for a grid table.
+gridTableHeader :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
+gridTableHeader headless block = try $ do
+ optional blanklines
+ dashes <- gridDashedLines '-'
+ rawContent <- if headless
+ then return $ repeat ""
+ else many1
+ (notFollowedBy (gridTableSep '=') >> char '|' >>
+ many1Till anyChar newline)
+ if headless
+ then return ()
+ else gridTableSep '=' >> return ()
+ let lines' = map snd dashes
+ let indices = scanl (+) 0 lines'
+ let aligns = replicate (length lines') AlignDefault
+ -- RST does not have a notion of alignments
+ let rawHeads = if headless
+ then replicate (length dashes) ""
+ else map (intercalate " ") $ transpose
+ $ map (gridTableSplitLine indices) rawContent
+ heads <- mapM (parseFromString $ many block) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
+
+gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
+gridTableRawLine indices = do
+ char '|'
+ line <- many1Till anyChar newline
+ return (gridTableSplitLine indices $ removeTrailingSpace line)
+
+-- | Parse row of grid table.
+gridTableRow :: GenParser Char ParserState Block
+ -> [Int]
+ -> GenParser Char ParserState [[Block]]
+gridTableRow block indices = do
+ colLines <- many1 (gridTableRawLine indices)
+ let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
+ transpose colLines
+ mapM (liftM compactifyCell . parseFromString (many block)) cols
+
+removeOneLeadingSpace :: [String] -> [String]
+removeOneLeadingSpace xs =
+ if all startsWithSpace xs
+ then map (drop 1) xs
+ else xs
+ where startsWithSpace "" = True
+ startsWithSpace (y:_) = y == ' '
+
+compactifyCell :: [Block] -> [Block]
+compactifyCell bs = head $ compactify [bs]
+
+-- | Parse footer for a grid table.
+gridTableFooter :: GenParser Char ParserState [Char]
+gridTableFooter = blanklines
+
+---
+
+-- | Parse a string with a given parser and state.
+readWith :: GenParser Char ParserState a -- ^ parser
+ -> ParserState -- ^ initial state
+ -> String -- ^ input string
+ -> a
+readWith parser state input =
+ case runParser parser state "source" input of
+ Left err -> error $ "\nError:\n" ++ show err
+ Right result -> result
+
+-- | Parse a string with @parser@ (for testing).
+testStringWith :: (Show a) => GenParser Char ParserState a
+ -> String
+ -> IO ()
+testStringWith parser str = UTF8.putStrLn $ show $
+ readWith parser defaultParserState str
+
+-- | Parsing options.
+data ParserState = ParserState
+ { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
+ stateParserContext :: ParserContext, -- ^ Inside list?
+ stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateSanitizeHTML :: Bool, -- ^ Sanitize HTML?
+ stateKeys :: KeyTable, -- ^ List of reference keys
+#ifdef _CITEPROC
+ stateCitations :: [String], -- ^ List of available citations
+#endif
+ stateNotes :: NoteTable, -- ^ List of notes
+ stateTabStop :: Int, -- ^ Tab stop
+ stateStandalone :: Bool, -- ^ Parse bibliographic info?
+ stateTitle :: [Inline], -- ^ Title of document
+ stateAuthors :: [[Inline]], -- ^ Authors of document
+ stateDate :: [Inline], -- ^ Date of document
+ stateStrict :: Bool, -- ^ Use strict markdown syntax?
+ stateSmart :: Bool, -- ^ Use smart typography?
+ stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
+ stateColumns :: Int, -- ^ Number of columns in terminal
+ stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
+ stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks
+ stateNextExample :: Int, -- ^ Number of next example
+ stateExamples :: M.Map String Int -- ^ Map from example labels to numbers
+ }
+ deriving Show
+
+defaultParserState :: ParserState
+defaultParserState =
+ ParserState { stateParseRaw = False,
+ stateParserContext = NullState,
+ stateQuoteContext = NoQuote,
+ stateSanitizeHTML = False,
+ stateKeys = M.empty,
+#ifdef _CITEPROC
+ stateCitations = [],
+#endif
+ stateNotes = [],
+ stateTabStop = 4,
+ stateStandalone = False,
+ stateTitle = [],
+ stateAuthors = [],
+ stateDate = [],
+ stateStrict = False,
+ stateSmart = False,
+ stateLiterateHaskell = False,
+ stateColumns = 80,
+ stateHeaderTable = [],
+ stateIndentedCodeClasses = [],
+ stateNextExample = 1,
+ stateExamples = M.empty }
+
+data HeaderType
+ = SingleHeader Char -- ^ Single line of characters underneath
+ | DoubleHeader Char -- ^ Lines of characters above and below
+ deriving (Eq, Show)
+
+data ParserContext
+ = ListItemState -- ^ Used when running parser on list item contents
+ | NullState -- ^ Default state
+ deriving (Eq, Show)
+
+data QuoteContext
+ = InSingleQuote -- ^ Used when parsing inside single quotes
+ | InDoubleQuote -- ^ Used when parsing inside double quotes
+ | NoQuote -- ^ Used when not parsing inside quotes
+ deriving (Eq, Show)
+
+type NoteTable = [(String, String)]
+
+newtype Key = Key [Inline] deriving (Show, Read)
+
+instance Eq Key where
+ Key a == Key b = refsMatch a b
+
+instance Ord Key where
+ compare (Key a) (Key b) = if a == b then EQ else compare a b
+
+type KeyTable = M.Map Key Target
+
+-- | Look up key in key table and return target object.
+lookupKeySrc :: KeyTable -- ^ Key table
+ -> Key -- ^ Key
+ -> Maybe Target
+lookupKeySrc table key = case M.lookup key table of
+ Nothing -> Nothing
+ Just src -> Just src
+
+-- | Returns @True@ if keys match (case insensitive).
+refsMatch :: [Inline] -> [Inline] -> Bool
+refsMatch ((Str x):restx) ((Str y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Emph x):restx) ((Emph y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strong x):restx) ((Strong y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Superscript x):restx) ((Superscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Subscript x):restx) ((Subscript y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) =
+ refsMatch x y && refsMatch restx resty
+refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
+ t == u && refsMatch x y && refsMatch restx resty
+refsMatch ((Code x):restx) ((Code y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((Math t x):restx) ((Math u y):resty) =
+ ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty
+refsMatch ((TeX x):restx) ((TeX y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
+ ((map toLower x) == (map toLower y)) && refsMatch restx resty
+refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
+refsMatch [] x = null x
+refsMatch x [] = null x
+
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 5c188e3d9..6d54e7349 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -44,7 +44,8 @@ module Text.Pandoc.Readers.HTML (
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Data.Maybe ( fromMaybe )
import Data.List ( isPrefixOf, isSuffixOf, intercalate )
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 36940fab0..bbc5bb872 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -35,7 +35,8 @@ module Text.Pandoc.Readers.LaTeX (
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe )
import Data.Char ( chr )
import Data.List ( isPrefixOf, isSuffixOf )
@@ -453,7 +454,7 @@ inline = choice [ str
, accentedChar
, nonbreakingSpace
, specialChar
- , rawLaTeXInline
+ , rawLaTeXInline'
, escapedChar
, unescapedChar
] <?> "inline"
@@ -771,11 +772,16 @@ footnote = try $ do
setInput rest
return $ Note blocks
+-- | Parse any LaTeX inline command and return it in a raw TeX inline element.
+rawLaTeXInline' :: GenParser Char ParserState Inline
+rawLaTeXInline' = do
+ notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
+ "\\section"]
+ rawLaTeXInline
+
-- | Parse any LaTeX command and return it in a raw TeX inline element.
rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try $ do
- notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
- "\\section"]
state <- getState
if stateParseRaw state
then do
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 0d3b30d10..086f85bb4 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -32,12 +32,13 @@ module Text.Pandoc.Readers.Markdown (
) where
import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate )
+import qualified Data.Map as M
import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
import Data.Maybe
-import qualified Data.Map as M
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
anyHtmlInlineTag, anyHtmlTag,
@@ -68,7 +69,7 @@ setextHChars = "=-"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\\[]*_~`<>$!^-.&@'\"\8216\8217\8220\8221;"
+specialChars = "\\[]*_~`<>$!^-.&@'\";"
--
-- auxiliary functions
@@ -203,10 +204,10 @@ referenceKey = try $ do
tit <- option "" referenceTitle
blanklines
endPos <- getPosition
- let newkey = (lab, (escapeURI $ removeTrailingSpace src, tit))
+ let target = (escapeURI $ removeTrailingSpace src, tit)
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = newkey : oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert (Key lab) target oldkeys }
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
@@ -716,7 +717,7 @@ dashedLine ch = do
-- Parse a table header with dashed lines of '-' preceded by
-- one (or zero) line of text.
simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
rawContent <- if headless
then return ""
@@ -735,7 +736,9 @@ simpleTableHeader headless = try $ do
let rawHeads' = if headless
then replicate (length dashes) ""
else rawHeads
- return (rawHeads', aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads'
+ return (heads, aligns, indices)
-- Parse a table footer - dashed lines followed by blank line.
tableFooter :: GenParser Char ParserState [Char]
@@ -764,65 +767,27 @@ multilineRow :: [Int]
-> GenParser Char ParserState [[Block]]
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
- optional blanklines
let cols = map unlines $ transpose colLines
mapM (parseFromString (many plain)) cols
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
-- Parses a table caption: inlines beginning with 'Table:'
-- and followed by blank lines.
tableCaption :: GenParser Char ParserState [Inline]
tableCaption = try $ do
skipNonindentSpaces
- string "Table:"
+ string ":" <|> string "Table:"
result <- many1 inline
blanklines
return $ normalizeSpaces result
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState end
- -> GenParser Char ParserState Block
-tableWith headerParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines' <- many1Till (lineParser indices) footerParser
- caption <- option [] tableCaption
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table caption aligns widths heads lines'
-
-- Parse a simple table with '---' header and one line per row.
simpleTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
simpleTable headless = do
Table c a _w h l <- tableWith (simpleTableHeader headless) tableLine
- (if headless then tableFooter else tableFooter <|> blanklines)
+ (return ())
+ (if headless then tableFooter else tableFooter <|> blanklines)
+ tableCaption
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
@@ -833,10 +798,10 @@ simpleTable headless = do
multilineTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
multilineTable headless =
- tableWith (multilineTableHeader headless) multilineRow tableFooter
+ tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter tableCaption
multilineTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([String], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
multilineTableHeader headless = try $ do
if headless
then return '\n'
@@ -860,7 +825,9 @@ multilineTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else map (intercalate " ") rawHeadsList
- return ((map removeLeadingTrailingSpace rawHeads), aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
@@ -880,9 +847,14 @@ alignType strLst len =
(True, True) -> AlignCenter
(False, False) -> AlignDefault
+gridTable :: Bool -- ^ Headerless table
+ -> GenParser Char ParserState Block
+gridTable = gridTableWith block tableCaption
+
table :: GenParser Char ParserState Block
table = multilineTable False <|> simpleTable True <|>
- simpleTable False <|> multilineTable True <?> "table"
+ simpleTable False <|> multilineTable True <|>
+ gridTable False <|> gridTable True <?> "table"
--
-- inline
@@ -1081,30 +1053,28 @@ failIfInQuoteContext context = do
singleQuoteStart :: GenParser Char ParserState Char
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- char '\8216' <|>
- (try $ do char '\''
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
- -- possess/contraction
- return '\'')
+ try $ do char '\''
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum)))
+ -- possess/contraction
+ return '\''
singleQuoteEnd :: GenParser Char st Char
singleQuoteEnd = try $ do
- char '\8217' <|> char '\''
+ char '\''
notFollowedBy alphaNum
return '\''
doubleQuoteStart :: GenParser Char ParserState Char
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
- char '\8220' <|>
- (try $ do char '"'
- notFollowedBy (oneOf " \t\n")
- return '"')
+ try $ do char '"'
+ notFollowedBy (oneOf " \t\n")
+ return '"'
doubleQuoteEnd :: GenParser Char st Char
-doubleQuoteEnd = char '\8221' <|> char '"'
+doubleQuoteEnd = char '"'
ellipses :: GenParser Char st Inline
ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
@@ -1229,7 +1199,7 @@ referenceLink lab = do
optional (newline >> skipSpaces) >> reference))
let ref' = if null ref then lab else ref
state <- getState
- case lookupKeySrc (stateKeys state) ref' of
+ case lookupKeySrc (stateKeys state) (Key ref') of
Nothing -> fail "no corresponding key"
Just target -> return target
@@ -1314,7 +1284,7 @@ inlineCitation = try $ do
chkCit :: Target -> GenParser Char ParserState (Maybe Target)
chkCit t = do
st <- getState
- case lookupKeySrc (stateKeys st) [Str $ fst t] of
+ case lookupKeySrc (stateKeys st) (Key [Str $ fst t]) of
Just _ -> fail "This is a link"
Nothing -> if elem (fst t) $ stateCitations st
then return $ Just t
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index c293c4fcd..13afe5053 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -31,10 +31,13 @@ module Text.Pandoc.Readers.RST (
readRST
) where
import Text.Pandoc.Definition
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
-import Control.Monad ( when, unless, liftM )
-import Data.List ( findIndex, delete, intercalate, transpose )
+import Control.Monad ( when, unless )
+import Data.List ( findIndex, intercalate, transpose, sort )
+import qualified Data.Map as M
+import Text.Printf ( printf )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -- ^ Parser state, including options for parser
@@ -93,9 +96,6 @@ parseRST = do
docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
setInput docMinusKeys
setPosition startPos
- st <- getState
- let reversedKeys = stateKeys st
- updateState $ \s -> s { stateKeys = reverse reversedKeys }
-- now parse it for real...
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@@ -540,10 +540,10 @@ referenceName = quotedReferenceName <|>
referenceKey :: GenParser Char ParserState [Char]
referenceKey = do
startPos <- getPosition
- key <- choice [imageKey, anonymousKey, regularKey]
+ (key, target) <- choice [imageKey, anonymousKey, regularKey]
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = key : oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert key target oldkeys }
optional blanklines
endPos <- getPosition
-- return enough blanks to replace key
@@ -558,28 +558,29 @@ targetURI = do
blanklines
return $ escapeURI $ removeLeadingTrailingSpace $ contents
-imageKey :: GenParser Char ParserState ([Inline], (String, [Char]))
+imageKey :: GenParser Char ParserState (Key, Target)
imageKey = try $ do
string ".. |"
ref <- manyTill inline (char '|')
skipSpaces
string "image::"
src <- targetURI
- return (normalizeSpaces ref, (src, ""))
+ return (Key (normalizeSpaces ref), (src, ""))
-anonymousKey :: GenParser Char st ([Inline], (String, [Char]))
+anonymousKey :: GenParser Char st (Key, Target)
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
- return ([Str "_"], (src, ""))
+ pos <- getPosition
+ return (Key [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, ""))
-regularKey :: GenParser Char ParserState ([Inline], (String, [Char]))
+regularKey :: GenParser Char ParserState (Key, Target)
regularKey = try $ do
string ".. _"
ref <- referenceName
char ':'
src <- targetURI
- return (normalizeSpaces ref, (src, ""))
+ return (Key (normalizeSpaces ref), (src, ""))
--
-- tables
@@ -607,41 +608,20 @@ dashedLine ch = do
simpleDashedLines :: Char -> GenParser Char st [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-gridPart :: Char -> GenParser Char st (Int, Int)
-gridPart ch = do
- dashes <- many1 (char ch)
- char '+'
- return (length dashes, length dashes + 1)
-
-gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
-gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
-
-- Parse a table row separator
simpleTableSep :: Char -> GenParser Char ParserState Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-gridTableSep :: Char -> GenParser Char ParserState Char
-gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-
-- Parse a table footer
simpleTableFooter :: GenParser Char ParserState [Char]
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-gridTableFooter :: GenParser Char ParserState [Char]
-gridTableFooter = blanklines
-
-- Parse a raw line and split it into chunks by indices.
simpleTableRawLine :: [Int] -> GenParser Char ParserState [String]
simpleTableRawLine indices = do
line <- many1Till anyChar newline
return (simpleTableSplitLine indices line)
-gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
-gridTableRawLine indices = do
- char '|'
- line <- many1Till anyChar newline
- return (gridTableSplitLine indices $ removeTrailingSpace line)
-
-- Parse a table row and return a list of blocks (columns).
simpleTableRow :: [Int] -> GenParser Char ParserState [[Block]]
simpleTableRow indices = do
@@ -651,64 +631,13 @@ simpleTableRow indices = do
let cols = map unlines . transpose $ firstLine : colLines
mapM (parseFromString (many plain)) cols
-gridTableRow :: [Int]
- -> GenParser Char ParserState [[Block]]
-gridTableRow indices = do
- colLines <- many1 (gridTableRawLine indices)
- let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
- transpose colLines
- mapM (liftM compactifyCell . parseFromString (many block)) cols
-
-compactifyCell :: [Block] -> [Block]
-compactifyCell bs = head $ compactify [bs]
-
simpleTableSplitLine :: [Int] -> String -> [String]
simpleTableSplitLine indices line =
map removeLeadingTrailingSpace
$ tail $ splitByIndices (init indices) line
-gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line =
- map removeFinalBar $ tail $ splitByIndices (init indices) line
-
-removeFinalBar :: String -> String
-removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") .
- reverse
-
-removeOneLeadingSpace :: [String] -> [String]
-removeOneLeadingSpace xs =
- if all startsWithSpace xs
- then map (drop 1) xs
- else xs
- where startsWithSpace "" = True
- startsWithSpace (y:_) = y == ' '
-
--- Calculate relative widths of table columns, based on indices
-widthsFromIndices :: Int -- Number of columns on terminal
- -> [Int] -- Indices
- -> [Double] -- Fractional relative sizes of columns
-widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths' = zipWith (-) indices (0:indices)
- lengths = reverse $
- case reverse lengths' of
- [] -> []
- [x] -> [x]
- -- compensate for the fact that intercolumn
- -- spaces are counted in widths of all columns
- -- but the last...
- (x:y:zs) -> if x < y && y - x <= 2
- then y:y:zs
- else x:y:zs
- totLength = sum lengths
- quotient = if totLength > numColumns
- then fromIntegral totLength
- else fromIntegral numColumns
- fracs = map (\l -> (fromIntegral l) / quotient) lengths in
- tail fracs
-
simpleTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([[Char]], [Alignment], [Int])
+ -> GenParser Char ParserState ([[Block]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
rawContent <- if headless
@@ -722,64 +651,23 @@ simpleTableHeader headless = try $ do
let rawHeads = if headless
then replicate (length dashes) ""
else simpleTableSplitLine indices rawContent
- return (rawHeads, aligns, indices)
+ heads <- mapM (parseFromString (many plain)) $
+ map removeLeadingTrailingSpace rawHeads
+ return (heads, aligns, indices)
-gridTableHeader :: Bool -- ^ Headerless table
- -> GenParser Char ParserState ([String], [Alignment], [Int])
-gridTableHeader headless = try $ do
- optional blanklines
- dashes <- gridDashedLines '-'
- rawContent <- if headless
- then return $ repeat ""
- else many1
- (notFollowedBy (gridTableSep '=') >> char '|' >> many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
- let lines' = map snd dashes
- let indices = scanl (+) 0 lines'
- let aligns = replicate (length lines') AlignDefault -- RST does not have a notion of alignments
- let rawHeads = if headless
- then replicate (length dashes) ""
- else map (intercalate " ") $ transpose
- $ map (gridTableSplitLine indices) rawContent
- return (rawHeads, aligns, indices)
-
--- Parse a table using 'headerParser', 'lineParser', and 'footerParser'.
-tableWith :: GenParser Char ParserState ([[Char]], [Alignment], [Int])
- -> ([Int] -> GenParser Char ParserState [[Block]])
- -> GenParser Char ParserState sep
- -> GenParser Char ParserState end
- -> GenParser Char ParserState Block
-tableWith headerParser rowParser lineParser footerParser = try $ do
- (rawHeads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy` lineParser
- footerParser
- heads <- mapM (parseFromString (many plain)) rawHeads
- state <- getState
- let captions = [] -- no notion of captions in RST
- let numColumns = stateColumns state
- let widths = widthsFromIndices numColumns indices
- return $ Table captions aligns widths heads lines'
-
--- Parse a simple table with '---' header and one line per row.
+-- Parse a simple table.
simpleTable :: Bool -- ^ Headerless table
-> GenParser Char ParserState Block
simpleTable headless = do
- Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter
+ Table c a _w h l <- tableWith (simpleTableHeader headless) simpleTableRow sep simpleTableFooter (return [])
-- Simple tables get 0s for relative column widths (i.e., use default)
return $ Table c a (replicate (length a) 0) h l
where
sep = return () -- optional (simpleTableSep '-')
--- Parse a grid table: starts with row of '-' on top, then header
--- (which may be grid), then the rows,
--- which may be grid, separated by blank lines, and
--- ending with a footer (dashed line followed by blank line).
gridTable :: Bool -- ^ Headerless table
- -> GenParser Char ParserState Block
-gridTable headless =
- tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter
+ -> GenParser Char ParserState Block
+gridTable = gridTableWith block (return [])
table :: GenParser Char ParserState Block
table = gridTable False <|> simpleTable False <|>
@@ -889,17 +777,21 @@ explicitLink = try $ do
referenceLink :: GenParser Char ParserState Inline
referenceLink = try $ do
label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
- key <- option label' (do{char '_'; return [Str "_"]}) -- anonymous link
state <- getState
let keyTable = stateKeys state
+ let isAnonKey (Key [Str ('_':_)]) = True
+ isAnonKey _ = False
+ key <- option (Key label') $
+ do char '_'
+ let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
+ if null anonKeys
+ then pzero
+ else return (head anonKeys)
(src,tit) <- case lookupKeySrc keyTable key of
Nothing -> fail "no corresponding key"
Just target -> return target
- -- if anonymous link, remove first anon key so it won't be used again
- let keyTable' = if (key == [Str "_"]) -- anonymous link?
- then delete ([Str "_"], (src,tit)) keyTable -- remove first anon key
- else keyTable
- setState $ state { stateKeys = keyTable' }
+ -- if anonymous link, remove key so it won't be used again
+ when (isAnonKey key) $ updateState $ \s -> s{ stateKeys = M.delete key keyTable }
return $ Link (normalizeSpaces label') (src, tit)
autoURI :: GenParser Char ParserState Inline
@@ -922,7 +814,7 @@ image = try $ do
ref <- manyTill inline (char '|')
state <- getState
let keyTable = stateKeys state
- (src,tit) <- case lookupKeySrc keyTable ref of
+ (src,tit) <- case lookupKeySrc keyTable (Key ref) of
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) (src, tit)
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index 080354be1..40cf39987 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -28,208 +28,64 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of TeX math to a list of 'Pandoc' inline elements.
-}
module Text.Pandoc.Readers.TeXMath (
- readTeXMath
+ readTeXMath
) where
import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
+import Text.TeXMath.Parser
--- | Converts a string of raw TeX math to a list of 'Pandoc' inlines.
+-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
+-- Defaults to raw formula between @$@ characters if entire formula
+-- can't be converted.
readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings)
-> [Inline]
-readTeXMath inp = case parse teXMath ("formula: " ++ inp) inp of
- Left _ -> [Str inp] -- if unparseable, just include original
- Right res -> res
-
-teXMath :: GenParser Char st [Inline]
-teXMath = manyTill mathPart eof >>= return . concat
-
-mathPart :: GenParser Char st [Inline]
-mathPart = whitespace <|> superscript <|> subscript <|> symbol <|>
- argument <|> digits <|> letters <|> misc
-
-whitespace :: GenParser Char st [Inline]
-whitespace = many1 space >> return []
-
-symbol :: GenParser Char st [Inline]
-symbol = try $ do
- char '\\'
- res <- many1 letter
- case lookup res teXsymbols of
- Just m -> return [Str m]
- Nothing -> return [Str $ "\\" ++ res]
-
-argument :: GenParser Char st [Inline]
-argument = try $ do
- char '{'
- res <- many mathPart
- char '}'
- return $ if null res
- then [Str " "]
- else [Str "{"] ++ concat res ++ [Str "}"]
-
-digits :: GenParser Char st [Inline]
-digits = do
- res <- many1 digit
- return [Str res]
-
-letters :: GenParser Char st [Inline]
-letters = do
- res <- many1 letter
- return [Emph [Str res]]
-
-misc :: GenParser Char st [Inline]
-misc = do
- res <- noneOf "}"
- return [Str [res]]
-
-scriptArg :: GenParser Char st [Inline]
-scriptArg = try $ do
- (try (do{char '{'; r <- many mathPart; char '}'; return $ concat r}))
- <|> symbol
- <|> (do{c <- (letter <|> digit); return [Str [c]]})
-
-superscript :: GenParser Char st [Inline]
-superscript = try $ do
- char '^'
- arg <- scriptArg
- return [Superscript arg]
-
-subscript :: GenParser Char st [Inline]
-subscript = try $ do
- char '_'
- arg <- scriptArg
- return [Subscript arg]
-
-withThinSpace :: String -> String
-withThinSpace str = "\x2009" ++ str ++ "\x2009"
-
-teXsymbols :: [(String, String)]
-teXsymbols =
- [("alpha","\x3B1")
- ,("beta", "\x3B2")
- ,("chi", "\x3C7")
- ,("delta", "\x3B4")
- ,("Delta", "\x394")
- ,("epsilon", "\x3B5")
- ,("varepsilon", "\x25B")
- ,("eta", "\x3B7")
- ,("gamma", "\x3B3")
- ,("Gamma", "\x393")
- ,("iota", "\x3B9")
- ,("kappa", "\x3BA")
- ,("lambda", "\x3BB")
- ,("Lambda", "\x39B")
- ,("mu", "\x3BC")
- ,("nu", "\x3BD")
- ,("omega", "\x3C9")
- ,("Omega", "\x3A9")
- ,("phi", "\x3C6")
- ,("varphi", "\x3D5")
- ,("Phi", "\x3A6")
- ,("pi", "\x3C0")
- ,("Pi", "\x3A0")
- ,("psi", "\x3C8")
- ,("Psi", "\x3A8")
- ,("rho", "\x3C1")
- ,("sigma", "\x3C3")
- ,("Sigma", "\x3A3")
- ,("tau", "\x3C4")
- ,("theta", "\x3B8")
- ,("vartheta", "\x3D1")
- ,("Theta", "\x398")
- ,("upsilon", "\x3C5")
- ,("xi", "\x3BE")
- ,("Xi", "\x39E")
- ,("zeta", "\x3B6")
- ,("ne", "\x2260")
- ,("lt", withThinSpace "<")
- ,("le", withThinSpace "\x2264")
- ,("leq", withThinSpace "\x2264")
- ,("ge", withThinSpace "\x2265")
- ,("geq", withThinSpace "\x2265")
- ,("prec", withThinSpace "\x227A")
- ,("succ", withThinSpace "\x227B")
- ,("preceq", withThinSpace "\x2AAF")
- ,("succeq", withThinSpace "\x2AB0")
- ,("in", withThinSpace "\x2208")
- ,("notin", withThinSpace "\x2209")
- ,("subset", withThinSpace "\x2282")
- ,("supset", withThinSpace "\x2283")
- ,("subseteq", withThinSpace "\x2286")
- ,("supseteq", withThinSpace "\x2287")
- ,("equiv", withThinSpace "\x2261")
- ,("cong", withThinSpace "\x2245")
- ,("approx", withThinSpace "\x2248")
- ,("propto", withThinSpace "\x221D")
- ,("cdot", withThinSpace "\x22C5")
- ,("star", withThinSpace "\x22C6")
- ,("backslash", "\\")
- ,("times", withThinSpace "\x00D7")
- ,("divide", withThinSpace "\x00F7")
- ,("circ", withThinSpace "\x2218")
- ,("oplus", withThinSpace "\x2295")
- ,("otimes", withThinSpace "\x2297")
- ,("odot", withThinSpace "\x2299")
- ,("sum", "\x2211")
- ,("prod", "\x220F")
- ,("wedge", withThinSpace "\x2227")
- ,("bigwedge", withThinSpace "\x22C0")
- ,("vee", withThinSpace "\x2228")
- ,("bigvee", withThinSpace "\x22C1")
- ,("cap", withThinSpace "\x2229")
- ,("bigcap", withThinSpace "\x22C2")
- ,("cup", withThinSpace "\x222A")
- ,("bigcup", withThinSpace "\x22C3")
- ,("neg", "\x00AC")
- ,("implies", withThinSpace "\x21D2")
- ,("iff", withThinSpace "\x21D4")
- ,("forall", "\x2200")
- ,("exists", "\x2203")
- ,("bot", "\x22A5")
- ,("top", "\x22A4")
- ,("vdash", "\x22A2")
- ,("models", withThinSpace "\x22A8")
- ,("uparrow", "\x2191")
- ,("downarrow", "\x2193")
- ,("rightarrow", withThinSpace "\x2192")
- ,("to", withThinSpace "\x2192")
- ,("rightarrowtail", "\x21A3")
- ,("twoheadrightarrow", withThinSpace "\x21A0")
- ,("twoheadrightarrowtail", withThinSpace "\x2916")
- ,("mapsto", withThinSpace "\x21A6")
- ,("leftarrow", withThinSpace "\x2190")
- ,("leftrightarrow", withThinSpace "\x2194")
- ,("Rightarrow", withThinSpace "\x21D2")
- ,("Leftarrow", withThinSpace "\x21D0")
- ,("Leftrightarrow", withThinSpace "\x21D4")
- ,("partial", "\x2202")
- ,("nabla", "\x2207")
- ,("pm", "\x00B1")
- ,("emptyset", "\x2205")
- ,("infty", "\x221E")
- ,("aleph", "\x2135")
- ,("ldots", "...")
- ,("therefore", "\x2234")
- ,("angle", "\x2220")
- ,("quad", "\x00A0\x00A0")
- ,("cdots", "\x22EF")
- ,("vdots", "\x22EE")
- ,("ddots", "\x22F1")
- ,("diamond", "\x22C4")
- ,("Box", "\x25A1")
- ,("lfloor", "\x230A")
- ,("rfloor", "\x230B")
- ,("lceiling", "\x2308")
- ,("rceiling", "\x2309")
- ,("langle", "\x2329")
- ,("rangle", "\x232A")
- ,("int", "\8747")
- ,("{", "{")
- ,("}", "}")
- ,("[", "[")
- ,("]", "]")
- ,("|", "|")
- ,("||", "||")
- ]
+readTeXMath inp = case readTeXMath' inp of
+ Nothing -> [Str ("$" ++ inp ++ "$")]
+ Just res -> res
+
+-- | Like 'readTeXMath', but without the default.
+readTeXMath' :: String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> Maybe [Inline]
+readTeXMath' inp = case parse formula "formula" inp of
+ Left _ -> Just [Str inp]
+ Right exps -> expsToInlines exps
+
+expsToInlines :: [Exp] -> Maybe [Inline]
+expsToInlines xs = do
+ res <- mapM expToInlines xs
+ return (concat res)
+
+expToInlines :: Exp -> Maybe [Inline]
+expToInlines (ENumber s) = Just [Str s]
+expToInlines (EIdentifier s) = Just [Emph [Str s]]
+expToInlines (EMathOperator s) = Just [Str s]
+expToInlines (ESymbol t s) = Just $ addSpace t (Str s)
+ where addSpace Op x = [x, thinspace]
+ addSpace Bin x = [medspace, x, medspace]
+ addSpace Rel x = [widespace, x, widespace]
+ addSpace Pun x = [x, thinspace]
+ addSpace _ x = [x]
+ thinspace = Str "\x2006"
+ medspace = Str "\x2005"
+ widespace = Str "\x2004"
+expToInlines (EStretchy x) = expToInlines x
+expToInlines (EGrouped xs) = expsToInlines xs
+expToInlines (ESpace _) = Just [Str " "] -- variable widths not supported
+expToInlines (EBinary _ _ _) = Nothing
+expToInlines (ESub x y) = do
+ x' <- expToInlines x
+ y' <- expToInlines y
+ return $ x' ++ [Subscript y']
+expToInlines (ESuper x y) = do
+ x' <- expToInlines x
+ y' <- expToInlines y
+ return $ x' ++ [Superscript y']
+expToInlines (ESubsup x y z) = do
+ x' <- expToInlines x
+ y' <- expToInlines y
+ z' <- expToInlines z
+ return $ x' ++ [Subscript y'] ++ [Superscript z']
+expToInlines (EText _ x) = Just [Emph [Str x]]
+expToInlines _ = Nothing
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index d465142b3..025b54b93 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -45,54 +45,15 @@ module Text.Pandoc.Shared (
toRomanNumeral,
escapeURI,
unescapeURI,
+ tabFilter,
+ -- * Prettyprinting
wrapped,
wrapIfNeeded,
wrappedTeX,
wrapTeXIfNeeded,
BlockWrapper (..),
wrappedBlocksToDoc,
- tabFilter,
- -- * Parsing
- (>>~),
- anyLine,
- many1Till,
- notFollowedBy',
- oneOfStrings,
- spaceChar,
- skipSpaces,
- blankline,
- blanklines,
- enclosed,
- stringAnyCase,
- parseFromString,
- lineClump,
- charsInBalanced,
- charsInBalanced',
- romanNumeral,
- emailAddress,
- uri,
- withHorizDisplacement,
- nullBlock,
- failIfStrict,
- failUnlessLHS,
- escaped,
- anyOrderedListMarker,
- orderedListMarker,
- charRef,
- readWith,
- testStringWith,
- ParserState (..),
- defaultParserState,
- HeaderType (..),
- ParserContext (..),
- QuoteContext (..),
- NoteTable,
- KeyTable,
- lookupKeySrc,
- refsMatch,
- -- * Prettyprinting
hang',
- prettyPandoc,
-- * Pandoc block and inline list processing
orderedListMarkers,
normalizeSpaces,
@@ -101,6 +62,7 @@ module Text.Pandoc.Shared (
hierarchicalize,
uniqueIdent,
isHeaderBlock,
+ headerShift,
-- * Writer options
HTMLMathMethod (..),
ObfuscationMethod (..),
@@ -112,27 +74,18 @@ module Text.Pandoc.Shared (
) where
import Text.Pandoc.Definition
-import Text.ParserCombinators.Parsec
+import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
import qualified Text.PrettyPrint.HughesPJ as PP
-import Text.Pandoc.CharacterReferences ( characterReference )
-import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha, isAscii,
- isPunctuation )
+import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
+ isLetter, isDigit )
import Data.List ( find, isPrefixOf, intercalate )
-import Network.URI ( parseURI, URI (..), isAllowedInURI, escapeURIString, unEscapeString )
+import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
import Codec.Binary.UTF8.String ( encodeString, decodeString )
import System.Directory
import System.FilePath ( (</>) )
--- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv
--- So we use System.IO.UTF8 only if we have an earlier version
-#if MIN_VERSION_base(4,2,0)
-#else
-import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
-import System.IO.UTF8
-#endif
-import Data.Generics
+import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
-import Control.Monad (join)
import Paths_pandoc (getDataFileName)
--
@@ -157,11 +110,11 @@ splitByIndices (x:xs) lst =
-- | Replace each occurrence of one sublist in a list with another.
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
substitute _ _ [] = []
-substitute [] _ lst = lst
-substitute target replacement lst =
+substitute [] _ xs = xs
+substitute target replacement lst@(x:xs) =
if target `isPrefixOf` lst
- then replacement ++ (substitute target replacement $ drop (length target) lst)
- else (head lst):(substitute target replacement $ tail lst)
+ then replacement ++ substitute target replacement (drop (length target) lst)
+ else x : substitute target replacement xs
--
-- Text processing
@@ -243,6 +196,30 @@ unescapeURI :: String -> String
unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
decodeString . unEscapeString
+-- | Convert tabs to spaces and filter out DOS line endings.
+-- Tabs will be preserved if tab stop is set to 0.
+tabFilter :: Int -- ^ Tab stop
+ -> String -- ^ Input
+ -> String
+tabFilter tabStop =
+ let go _ [] = ""
+ go _ ('\n':xs) = '\n' : go tabStop xs
+ go _ ('\r':'\n':xs) = '\n' : go tabStop xs
+ go _ ('\r':xs) = '\n' : go tabStop xs
+ go spsToNextStop ('\t':xs) =
+ if tabStop == 0
+ then '\t' : go tabStop xs
+ else replicate spsToNextStop ' ' ++ go tabStop xs
+ go 1 (x:xs) =
+ x : go tabStop xs
+ go spsToNextStop (x:xs) =
+ x : go (spsToNextStop - 1) xs
+ in go tabStop
+
+--
+-- Prettyprinting
+--
+
-- | Wrap inlines to line length.
wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
@@ -312,560 +289,10 @@ wrappedBlocksToDoc = foldr addBlock empty
addBlock (Pad d) accum = d $$ text "" $$ accum
addBlock (Reg d) accum = d $$ accum
--- | Convert tabs to spaces and filter out DOS line endings.
--- Tabs will be preserved if tab stop is set to 0.
-tabFilter :: Int -- ^ Tab stop
- -> String -- ^ Input
- -> String
-tabFilter tabStop =
- let go _ [] = ""
- go _ ('\n':xs) = '\n' : go tabStop xs
- go _ ('\r':'\n':xs) = '\n' : go tabStop xs
- go _ ('\r':xs) = '\n' : go tabStop xs
- go spsToNextStop ('\t':xs) =
- if tabStop == 0
- then '\t' : go tabStop xs
- else replicate spsToNextStop ' ' ++ go tabStop xs
- go 1 (x:xs) =
- x : go tabStop xs
- go spsToNextStop (x:xs) =
- x : go (spsToNextStop - 1) xs
- in go tabStop
-
---
--- Parsing
---
-
--- | Like >>, but returns the operation on the left.
--- (Suggested by Tillmann Rendel on Haskell-cafe list.)
-(>>~) :: (Monad m) => m a -> m b -> m a
-a >>~ b = a >>= \x -> b >> return x
-
--- | Parse any line of text
-anyLine :: GenParser Char st [Char]
-anyLine = manyTill anyChar newline
-
--- | Like @manyTill@, but reads at least one item.
-many1Till :: GenParser tok st a
- -> GenParser tok st end
- -> GenParser tok st [a]
-many1Till p end = do
- first <- p
- rest <- manyTill p end
- return (first:rest)
-
--- | A more general form of @notFollowedBy@. This one allows any
--- type of parser to be specified, and succeeds only if that parser fails.
--- It does not consume any input.
-notFollowedBy' :: Show b => GenParser a st b -> GenParser a st ()
-notFollowedBy' p = try $ join $ do a <- try p
- return (unexpected (show a))
- <|>
- return (return ())
--- (This version due to Andrew Pimlott on the Haskell mailing list.)
-
--- | Parses one of a list of strings (tried in order).
-oneOfStrings :: [String] -> GenParser Char st String
-oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
-
--- | Parses a space or tab.
-spaceChar :: CharParser st Char
-spaceChar = char ' ' <|> char '\t'
-
--- | Skips zero or more spaces or tabs.
-skipSpaces :: GenParser Char st ()
-skipSpaces = skipMany spaceChar
-
--- | Skips zero or more spaces or tabs, then reads a newline.
-blankline :: GenParser Char st Char
-blankline = try $ skipSpaces >> newline
-
--- | Parses one or more blank lines and returns a string of newlines.
-blanklines :: GenParser Char st [Char]
-blanklines = many1 blankline
-
--- | Parses material enclosed between start and end parsers.
-enclosed :: GenParser Char st t -- ^ start parser
- -> GenParser Char st end -- ^ end parser
- -> GenParser Char st a -- ^ content parser (to be used repeatedly)
- -> GenParser Char st [a]
-enclosed start end parser = try $
- start >> notFollowedBy space >> many1Till parser end
-
--- | Parse string, case insensitive.
-stringAnyCase :: [Char] -> CharParser st String
-stringAnyCase [] = string ""
-stringAnyCase (x:xs) = do
- firstChar <- char (toUpper x) <|> char (toLower x)
- rest <- stringAnyCase xs
- return (firstChar:rest)
-
--- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: GenParser tok st a -> [tok] -> GenParser tok st a
-parseFromString parser str = do
- oldPos <- getPosition
- oldInput <- getInput
- setInput str
- result <- parser
- setInput oldInput
- setPosition oldPos
- return result
-
--- | Parse raw line block up to and including blank lines.
-lineClump :: GenParser Char st String
-lineClump = blanklines
- <|> (many1 (notFollowedBy blankline >> anyLine) >>= return . unlines)
-
--- | Parse a string of characters between an open character
--- and a close character, including text between balanced
--- pairs of open and close, which must be different. For example,
--- @charsInBalanced '(' ')'@ will parse "(hello (there))"
--- and return "hello (there)". Stop if a blank line is
--- encountered.
-charsInBalanced :: Char -> Char -> GenParser Char st String
-charsInBalanced open close = try $ do
- char open
- raw <- many $ (many1 (noneOf [open, close, '\n']))
- <|> (do res <- charsInBalanced open close
- return $ [open] ++ res ++ [close])
- <|> try (string "\n" >>~ notFollowedBy' blanklines)
- char close
- return $ concat raw
-
--- | Like @charsInBalanced@, but allow blank lines in the content.
-charsInBalanced' :: Char -> Char -> GenParser Char st String
-charsInBalanced' open close = try $ do
- char open
- raw <- many $ (many1 (noneOf [open, close]))
- <|> (do res <- charsInBalanced' open close
- return $ [open] ++ res ++ [close])
- char close
- return $ concat raw
-
--- Auxiliary functions for romanNumeral:
-
-lowercaseRomanDigits :: [Char]
-lowercaseRomanDigits = ['i','v','x','l','c','d','m']
-
-uppercaseRomanDigits :: [Char]
-uppercaseRomanDigits = map toUpper lowercaseRomanDigits
-
--- | Parses a roman numeral (uppercase or lowercase), returns number.
-romanNumeral :: Bool -- ^ Uppercase if true
- -> GenParser Char st Int
-romanNumeral upperCase = do
- let romanDigits = if upperCase
- then uppercaseRomanDigits
- else lowercaseRomanDigits
- lookAhead $ oneOf romanDigits
- let [one, five, ten, fifty, hundred, fivehundred, thousand] =
- map char romanDigits
- thousands <- many thousand >>= (return . (1000 *) . length)
- ninehundreds <- option 0 $ try $ hundred >> thousand >> return 900
- fivehundreds <- many fivehundred >>= (return . (500 *) . length)
- fourhundreds <- option 0 $ try $ hundred >> fivehundred >> return 400
- hundreds <- many hundred >>= (return . (100 *) . length)
- nineties <- option 0 $ try $ ten >> hundred >> return 90
- fifties <- many fifty >>= (return . (50 *) . length)
- forties <- option 0 $ try $ ten >> fifty >> return 40
- tens <- many ten >>= (return . (10 *) . length)
- nines <- option 0 $ try $ one >> ten >> return 9
- fives <- many five >>= (return . (5 *) . length)
- fours <- option 0 $ try $ one >> five >> return 4
- ones <- many one >>= (return . length)
- let total = thousands + ninehundreds + fivehundreds + fourhundreds +
- hundreds + nineties + fifties + forties + tens + nines +
- fives + fours + ones
- if total == 0
- then fail "not a roman numeral"
- else return total
-
--- Parsers for email addresses and URIs
-
-emailChar :: GenParser Char st Char
-emailChar = alphaNum <|> oneOf "-+_."
-
-domainChar :: GenParser Char st Char
-domainChar = alphaNum <|> char '-'
-
-domain :: GenParser Char st [Char]
-domain = do
- first <- many1 domainChar
- dom <- many1 $ try (char '.' >> many1 domainChar )
- return $ intercalate "." (first:dom)
-
--- | Parses an email address; returns original and corresponding
--- escaped mailto: URI.
-emailAddress :: GenParser Char st (String, String)
-emailAddress = try $ do
- firstLetter <- alphaNum
- restAddr <- many emailChar
- let addr = firstLetter:restAddr
- char '@'
- dom <- domain
- let full = addr ++ '@':dom
- return (full, escapeURI $ "mailto:" ++ full)
-
--- | Parses a URI. Returns pair of original and URI-escaped version.
-uri :: GenParser Char st (String, String)
-uri = try $ do
- let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:",
- "news:", "telnet:" ]
- lookAhead $ oneOfStrings protocols
- -- scan non-ascii characters and ascii characters allowed in a URI
- str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c)
- -- now see if they amount to an absolute URI
- case parseURI (escapeURI str) of
- Just uri' -> if uriScheme uri' `elem` protocols
- then return (str, show uri')
- else fail "not a URI"
- Nothing -> fail "not a URI"
-
--- | Applies a parser, returns tuple of its results and its horizontal
--- displacement (the difference between the source column at the end
--- and the source column at the beginning). Vertical displacement
--- (source row) is ignored.
-withHorizDisplacement :: GenParser Char st a -- ^ Parser to apply
- -> GenParser Char st (a, Int) -- ^ (result, displacement)
-withHorizDisplacement parser = do
- pos1 <- getPosition
- result <- parser
- pos2 <- getPosition
- return (result, sourceColumn pos2 - sourceColumn pos1)
-
--- | Parses a character and returns 'Null' (so that the parser can move on
--- if it gets stuck).
-nullBlock :: GenParser Char st Block
-nullBlock = anyChar >> return Null
-
--- | Fail if reader is in strict markdown syntax mode.
-failIfStrict :: GenParser Char ParserState ()
-failIfStrict = do
- state <- getState
- if stateStrict state then fail "strict mode" else return ()
-
--- | Fail unless we're in literate haskell mode.
-failUnlessLHS :: GenParser tok ParserState ()
-failUnlessLHS = do
- state <- getState
- if stateLiterateHaskell state then return () else fail "Literate haskell feature"
-
--- | Parses backslash, then applies character parser.
-escaped :: GenParser Char st Char -- ^ Parser for character to escape
- -> GenParser Char st Inline
-escaped parser = try $ do
- char '\\'
- result <- parser
- return (Str [result])
-
--- | Parses an uppercase roman numeral and returns (UpperRoman, number).
-upperRoman :: GenParser Char st (ListNumberStyle, Int)
-upperRoman = do
- num <- romanNumeral True
- return (UpperRoman, num)
-
--- | Parses a lowercase roman numeral and returns (LowerRoman, number).
-lowerRoman :: GenParser Char st (ListNumberStyle, Int)
-lowerRoman = do
- num <- romanNumeral False
- return (LowerRoman, num)
-
--- | Parses a decimal numeral and returns (Decimal, number).
-decimal :: GenParser Char st (ListNumberStyle, Int)
-decimal = do
- num <- many1 digit
- return (Decimal, read num)
-
--- | Parses a '@' and optional label and
--- returns (DefaultStyle, [next example number]). The next
--- example number is incremented in parser state, and the label
--- (if present) is added to the label table.
-exampleNum :: GenParser Char ParserState (ListNumberStyle, Int)
-exampleNum = do
- char '@'
- lab <- many (alphaNum <|> oneOf "_-")
- st <- getState
- let num = stateNextExample st
- let newlabels = if null lab
- then stateExamples st
- else M.insert lab num $ stateExamples st
- updateState $ \s -> s{ stateNextExample = num + 1
- , stateExamples = newlabels }
- return (Example, num)
-
--- | Parses a '#' returns (DefaultStyle, 1).
-defaultNum :: GenParser Char st (ListNumberStyle, Int)
-defaultNum = do
- char '#'
- return (DefaultStyle, 1)
-
--- | Parses a lowercase letter and returns (LowerAlpha, number).
-lowerAlpha :: GenParser Char st (ListNumberStyle, Int)
-lowerAlpha = do
- ch <- oneOf ['a'..'z']
- return (LowerAlpha, ord ch - ord 'a' + 1)
-
--- | Parses an uppercase letter and returns (UpperAlpha, number).
-upperAlpha :: GenParser Char st (ListNumberStyle, Int)
-upperAlpha = do
- ch <- oneOf ['A'..'Z']
- return (UpperAlpha, ord ch - ord 'A' + 1)
-
--- | Parses a roman numeral i or I
-romanOne :: GenParser Char st (ListNumberStyle, Int)
-romanOne = (char 'i' >> return (LowerRoman, 1)) <|>
- (char 'I' >> return (UpperRoman, 1))
-
--- | Parses an ordered list marker and returns list attributes.
-anyOrderedListMarker :: GenParser Char ParserState ListAttributes
-anyOrderedListMarker = choice $
- [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens],
- numParser <- [decimal, exampleNum, defaultNum, romanOne,
- lowerAlpha, lowerRoman, upperAlpha, upperRoman]]
-
--- | Parses a list number (num) followed by a period, returns list attributes.
-inPeriod :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
-inPeriod num = try $ do
- (style, start) <- num
- char '.'
- let delim = if style == DefaultStyle
- then DefaultDelim
- else Period
- return (start, style, delim)
-
--- | Parses a list number (num) followed by a paren, returns list attributes.
-inOneParen :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
-inOneParen num = try $ do
- (style, start) <- num
- char ')'
- return (start, style, OneParen)
-
--- | Parses a list number (num) enclosed in parens, returns list attributes.
-inTwoParens :: GenParser Char st (ListNumberStyle, Int)
- -> GenParser Char st ListAttributes
-inTwoParens num = try $ do
- char '('
- (style, start) <- num
- char ')'
- return (start, style, TwoParens)
-
--- | Parses an ordered list marker with a given style and delimiter,
--- returns number.
-orderedListMarker :: ListNumberStyle
- -> ListNumberDelim
- -> GenParser Char ParserState Int
-orderedListMarker style delim = do
- let num = defaultNum <|> -- # can continue any kind of list
- case style of
- DefaultStyle -> decimal
- Example -> exampleNum
- Decimal -> decimal
- UpperRoman -> upperRoman
- LowerRoman -> lowerRoman
- UpperAlpha -> upperAlpha
- LowerAlpha -> lowerAlpha
- let context = case delim of
- DefaultDelim -> inPeriod
- Period -> inPeriod
- OneParen -> inOneParen
- TwoParens -> inTwoParens
- (start, _, _) <- context num
- return start
-
--- | Parses a character reference and returns a Str element.
-charRef :: GenParser Char st Inline
-charRef = do
- c <- characterReference
- return $ Str [c]
-
--- | Parse a string with a given parser and state.
-readWith :: GenParser Char ParserState a -- ^ parser
- -> ParserState -- ^ initial state
- -> String -- ^ input string
- -> a
-readWith parser state input =
- case runParser parser state "source" input of
- Left err -> error $ "\nError:\n" ++ show err
- Right result -> result
-
--- | Parse a string with @parser@ (for testing).
-testStringWith :: (Show a) => GenParser Char ParserState a
- -> String
- -> IO ()
-testStringWith parser str = putStrLn $ show $
- readWith parser defaultParserState str
-
--- | Parsing options.
-data ParserState = ParserState
- { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
- stateParserContext :: ParserContext, -- ^ Inside list?
- stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
- stateSanitizeHTML :: Bool, -- ^ Sanitize HTML?
- stateKeys :: KeyTable, -- ^ List of reference keys
-#ifdef _CITEPROC
- stateCitations :: [String], -- ^ List of available citations
-#endif
- stateNotes :: NoteTable, -- ^ List of notes
- stateTabStop :: Int, -- ^ Tab stop
- stateStandalone :: Bool, -- ^ Parse bibliographic info?
- stateTitle :: [Inline], -- ^ Title of document
- stateAuthors :: [[Inline]], -- ^ Authors of document
- stateDate :: [Inline], -- ^ Date of document
- stateStrict :: Bool, -- ^ Use strict markdown syntax?
- stateSmart :: Bool, -- ^ Use smart typography?
- stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
- stateColumns :: Int, -- ^ Number of columns in terminal
- stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
- stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks
- stateNextExample :: Int, -- ^ Number of next example
- stateExamples :: M.Map String Int -- ^ Map from example labels to numbers
- }
- deriving Show
-
-defaultParserState :: ParserState
-defaultParserState =
- ParserState { stateParseRaw = False,
- stateParserContext = NullState,
- stateQuoteContext = NoQuote,
- stateSanitizeHTML = False,
- stateKeys = [],
-#ifdef _CITEPROC
- stateCitations = [],
-#endif
- stateNotes = [],
- stateTabStop = 4,
- stateStandalone = False,
- stateTitle = [],
- stateAuthors = [],
- stateDate = [],
- stateStrict = False,
- stateSmart = False,
- stateLiterateHaskell = False,
- stateColumns = 80,
- stateHeaderTable = [],
- stateIndentedCodeClasses = [],
- stateNextExample = 1,
- stateExamples = M.empty }
-
-data HeaderType
- = SingleHeader Char -- ^ Single line of characters underneath
- | DoubleHeader Char -- ^ Lines of characters above and below
- deriving (Eq, Show)
-
-data ParserContext
- = ListItemState -- ^ Used when running parser on list item contents
- | NullState -- ^ Default state
- deriving (Eq, Show)
-
-data QuoteContext
- = InSingleQuote -- ^ Used when parsing inside single quotes
- | InDoubleQuote -- ^ Used when parsing inside double quotes
- | NoQuote -- ^ Used when not parsing inside quotes
- deriving (Eq, Show)
-
-type NoteTable = [(String, String)]
-
-type KeyTable = [([Inline], Target)]
-
--- | Look up key in key table and return target object.
-lookupKeySrc :: KeyTable -- ^ Key table
- -> [Inline] -- ^ Key
- -> Maybe Target
-lookupKeySrc table key = case find (refsMatch key . fst) table of
- Nothing -> Nothing
- Just (_, src) -> Just src
-
--- | Returns @True@ if keys match (case insensitive).
-refsMatch :: [Inline] -> [Inline] -> Bool
-refsMatch ((Str x):restx) ((Str y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((Emph x):restx) ((Emph y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strong x):restx) ((Strong y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Superscript x):restx) ((Superscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Subscript x):restx) ((Subscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
- t == u && refsMatch x y && refsMatch restx resty
-refsMatch ((Code x):restx) ((Code y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((Math t x):restx) ((Math u y):resty) =
- ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty
-refsMatch ((TeX x):restx) ((TeX y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
-refsMatch [] x = null x
-refsMatch x [] = null x
-
---
--- Prettyprinting
---
-
-- | A version of hang that works like the version in pretty-1.0.0.0
hang' :: Doc -> Int -> Doc -> Doc
hang' d1 n d2 = d1 $$ (nest n d2)
--- | Indent string as a block.
-indentBy :: Int -- ^ Number of spaces to indent the block
- -> Int -- ^ Number of spaces (rel to block) to indent first line
- -> String -- ^ Contents of block to indent
- -> String
-indentBy _ _ [] = ""
-indentBy num first str =
- let (firstLine:restLines) = lines str
- firstLineIndent = num + first
- in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
- (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines)
-
--- | Prettyprint list of Pandoc blocks elements.
-prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
- -> [Block] -- ^ List of blocks
- -> String
-prettyBlockList indent [] = indentBy indent 0 "[]"
-prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
- (intercalate "\n, " (map prettyBlock blocks)) ++ " ]"
-
--- | Prettyprint Pandoc block element.
-prettyBlock :: Block -> String
-prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
- (prettyBlockList 2 blocks)
-prettyBlock (OrderedList attribs blockLists) =
- "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
- (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks)
- blockLists)) ++ " ]"
-prettyBlock (BulletList blockLists) = "BulletList\n" ++
- indentBy 2 0 ("[ " ++ (intercalate ", "
- (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
-prettyBlock (DefinitionList items) = "DefinitionList\n" ++
- indentBy 2 0 ("[ " ++ (intercalate "\n, "
- (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++
- indentBy 3 0 ("[ " ++ (intercalate ", "
- (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++
- ")") items))) ++ " ]"
-prettyBlock (Table caption aligns widths header rows) =
- "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
- show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
- (intercalate ",\n" (map prettyRow rows)) ++ " ]"
- where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", "
- (map (\blocks -> prettyBlockList 2 blocks)
- cols))) ++ " ]"
-prettyBlock block = show block
-
--- | Prettyprint Pandoc document.
-prettyPandoc :: Pandoc -> String
-prettyPandoc (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
- ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
-
--
-- Pandoc block and inline list processing
--
@@ -877,7 +304,7 @@ orderedListMarkers (start, numstyle, numdelim) =
let singleton c = [c]
nums = case numstyle of
DefaultStyle -> map show [start..]
- Example -> map show [start..]
+ Example -> map show [start..]
Decimal -> map show [start..]
UpperAlpha -> drop (start - 1) $ cycle $
map singleton ['A'..'Z']
@@ -937,38 +364,37 @@ data Element = Blk Block
-- lvl num ident label contents
deriving (Eq, Read, Show, Typeable, Data)
--- | Convert Pandoc inline list to plain text identifier.
+-- | Convert Pandoc inline list to plain text identifier. HTML
+-- identifiers must start with a letter, and may contain only
+-- letters, digits, and the characters _-.
inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
-
-inlineListToIdentifier' :: [Inline] -> [Char]
-inlineListToIdentifier' [] = ""
-inlineListToIdentifier' (x:xs) =
- xAsText ++ inlineListToIdentifier' xs
- where xAsText = case x of
- Str s -> filter (\c -> c `elem` "_-." || not (isPunctuation c)) $
- intercalate "-" $ words $ map toLower s
- Emph lst -> inlineListToIdentifier' lst
- Strikeout lst -> inlineListToIdentifier' lst
- Superscript lst -> inlineListToIdentifier' lst
- SmallCaps lst -> inlineListToIdentifier' lst
- Subscript lst -> inlineListToIdentifier' lst
- Strong lst -> inlineListToIdentifier' lst
- Quoted _ lst -> inlineListToIdentifier' lst
- Cite _ lst -> inlineListToIdentifier' lst
- Code s -> s
- Space -> "-"
- EmDash -> "-"
- EnDash -> "-"
- Apostrophe -> ""
- Ellipses -> ""
- LineBreak -> "-"
- Math _ _ -> ""
- TeX _ -> ""
- HtmlInline _ -> ""
- Link lst _ -> inlineListToIdentifier' lst
- Image lst _ -> inlineListToIdentifier' lst
- Note _ -> ""
+inlineListToIdentifier =
+ dropWhile (not . isAlpha) . intercalate "-" . words . map toLower .
+ filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
+ concatMap extractText
+ where extractText x = case x of
+ Str s -> s
+ Emph lst -> concatMap extractText lst
+ Strikeout lst -> concatMap extractText lst
+ Superscript lst -> concatMap extractText lst
+ SmallCaps lst -> concatMap extractText lst
+ Subscript lst -> concatMap extractText lst
+ Strong lst -> concatMap extractText lst
+ Quoted _ lst -> concatMap extractText lst
+ Cite _ lst -> concatMap extractText lst
+ Code s -> s
+ Space -> " "
+ EmDash -> "---"
+ EnDash -> "--"
+ Apostrophe -> ""
+ Ellipses -> "..."
+ LineBreak -> " "
+ Math _ s -> s
+ TeX _ -> ""
+ HtmlInline _ -> ""
+ Link lst _ -> concatMap extractText lst
+ Image lst _ -> concatMap extractText lst
+ Note _ -> ""
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
@@ -1000,7 +426,9 @@ headerLtEq _ _ = False
-- Second argument is a list of already used identifiers.
uniqueIdent :: [Inline] -> [String] -> String
uniqueIdent title' usedIdents =
- let baseIdent = inlineListToIdentifier title'
+ let baseIdent = case inlineListToIdentifier title' of
+ "" -> "section"
+ x -> x
numIdent n = baseIdent ++ "-" ++ show n
in if baseIdent `elem` usedIdents
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
@@ -1013,6 +441,13 @@ isHeaderBlock :: Block -> Bool
isHeaderBlock (Header _ _) = True
isHeaderBlock _ = False
+-- | Shift header levels up or down.
+headerShift :: Int -> Pandoc -> Pandoc
+headerShift n = processWith shift
+ where shift :: Block -> Block
+ shift (Header level inner) = Header (level + n) inner
+ shift x = x
+
--
-- Writer options
--
@@ -1036,8 +471,7 @@ data WriterOptions = WriterOptions
{ writerStandalone :: Bool -- ^ Include header and footer
, writerTemplate :: String -- ^ Template to use in standalone mode
, writerVariables :: [(String, String)] -- ^ Variables to set in template
- , writerIncludeBefore :: String -- ^ Text to include before the body
- , writerIncludeAfter :: String -- ^ Text to include after the body
+ , writerEPUBMetadata :: String -- ^ Metadata to include in EPUB
, writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
, writerTableOfContents :: Bool -- ^ Include table of contents
, writerS5 :: Bool -- ^ We're writing S5
@@ -1052,6 +486,8 @@ data WriterOptions = WriterOptions
, writerLiterateHaskell :: Bool -- ^ Write as literate haskell
, writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
, writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
+ , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file
+ , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory
} deriving Show
-- | Default writer options.
@@ -1060,8 +496,7 @@ defaultWriterOptions =
WriterOptions { writerStandalone = False
, writerTemplate = ""
, writerVariables = []
- , writerIncludeBefore = ""
- , writerIncludeAfter = ""
+ , writerEPUBMetadata = ""
, writerTabStop = 4
, writerTableOfContents = False
, writerS5 = False
@@ -1076,6 +511,8 @@ defaultWriterOptions =
, writerLiterateHaskell = False
, writerEmailObfuscation = JavascriptObfuscation
, writerIdentifierPrefix = ""
+ , writerSourceDirectory = "."
+ , writerUserDataDir = Nothing
}
--
@@ -1096,6 +533,6 @@ inDirectory path action = do
readDataFile :: Maybe FilePath -> FilePath -> IO String
readDataFile userDir fname =
case userDir of
- Nothing -> getDataFileName fname >>= readFile
- Just u -> catch (readFile $ u </> fname)
- (\_ -> getDataFileName fname >>= readFile)
+ Nothing -> getDataFileName fname >>= UTF8.readFile
+ Just u -> catch (UTF8.readFile $ u </> fname)
+ (\_ -> getDataFileName fname >>= UTF8.readFile)
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 2238f4da8..372954ae3 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -173,7 +173,7 @@ for = try $ do
string "$for("
id' <- ident
string ")$"
- -- if newline after the "if", then a newline after "endif" will be swallowed
+ -- if newline after the "for", then a newline after "endfor" will be swallowed
multiline <- option False $ try $ skipEndline >> return True
let matches = filter (\(k,_) -> k == id') vars
let indent = replicate pos ' '
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
new file mode 100644
index 000000000..3dd61176c
--- /dev/null
+++ b/src/Text/Pandoc/UTF8.hs
@@ -0,0 +1,72 @@
+{-
+Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.UTF8
+ Copyright : Copyright (C) 2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+UTF-8 aware string IO functions that will work with GHC 6.10 or 6.12.
+-}
+module Text.Pandoc.UTF8 ( readFile
+ , writeFile
+ , getContents
+ , putStr
+ , putStrLn
+ , hPutStr
+ , hPutStrLn
+ )
+
+where
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 (toString, fromString)
+import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn)
+import System.IO (Handle)
+import Control.Monad (liftM)
+
+bom :: B.ByteString
+bom = B.pack [0xEF, 0xBB, 0xBF]
+
+stripBOM :: B.ByteString -> B.ByteString
+stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s
+stripBOM s = s
+
+readFile :: FilePath -> IO String
+readFile = liftM (toString . stripBOM) . B.readFile
+
+writeFile :: FilePath -> String -> IO ()
+writeFile f = B.writeFile f . fromString
+
+getContents :: IO String
+getContents = liftM (toString . stripBOM) B.getContents
+
+putStr :: String -> IO ()
+putStr = B.putStr . fromString
+
+putStrLn :: String -> IO ()
+putStrLn = B.putStrLn . fromString
+
+hPutStr :: Handle -> String -> IO ()
+hPutStr h = B.hPutStr h . fromString
+
+hPutStrLn :: Handle -> String -> IO ()
+hPutStrLn h s = hPutStr h (s ++ "\n")
diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs
new file mode 100644
index 000000000..082644eea
--- /dev/null
+++ b/src/Text/Pandoc/UUID.hs
@@ -0,0 +1,77 @@
+{-
+Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.UUID
+ Copyright : Copyright (C) 2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+UUID generation using Version 4 (random method) described
+in RFC4122. See http://tools.ietf.org/html/rfc4122
+-}
+
+module Text.Pandoc.UUID ( UUID, getRandomUUID ) where
+
+import Text.Printf ( printf )
+import System.Random ( randomIO )
+import Data.Word
+import Data.Bits ( setBit, clearBit )
+import Control.Monad ( liftM )
+
+data UUID = UUID Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
+ Word8 Word8 Word8 Word8 Word8 Word8 Word8 Word8
+
+instance Show UUID where
+ show (UUID a b c d e f g h i j k l m n o p) =
+ "urn:uuid:" ++
+ printf "%02x" a ++
+ printf "%02x" b ++
+ printf "%02x" c ++
+ printf "%02x" d ++
+ "-" ++
+ printf "%02x" e ++
+ printf "%02x" f ++
+ "-" ++
+ printf "%02x" g ++
+ printf "%02x" h ++
+ "-" ++
+ printf "%02x" i ++
+ printf "%02x" j ++
+ "-" ++
+ printf "%02x" k ++
+ printf "%02x" l ++
+ printf "%02x" m ++
+ printf "%02x" n ++
+ printf "%02x" o ++
+ printf "%02x" p
+
+getRandomUUID :: IO UUID
+getRandomUUID = do
+ let getRN :: a -> IO Word8
+ getRN _ = liftM fromIntegral (randomIO :: IO Int)
+ [a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p] <- mapM getRN ([1..16] :: [Int])
+ -- set variant
+ let i' = i `setBit` 7 `clearBit` 6
+ -- set version (0100 for random)
+ let g' = g `clearBit` 7 `setBit` 6 `clearBit` 5 `clearBit` 4
+ return $ UUID a b c d e f g' h i' j k l m n o p
+
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
new file mode 100644
index 000000000..deaa2fe33
--- /dev/null
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -0,0 +1,283 @@
+{-
+Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.EPUB
+ Copyright : Copyright (C) 2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to EPUB.
+-}
+module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
+import Data.IORef
+import Data.Maybe ( fromMaybe, isNothing )
+import Data.List ( findIndices, isPrefixOf )
+import System.Environment ( getEnv )
+import System.FilePath ( (</>), takeBaseName, takeExtension )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 ( fromString )
+import Codec.Archive.Zip
+import System.Time
+import Text.Pandoc.Shared hiding ( Element )
+import Text.Pandoc.Definition
+import Control.Monad (liftM)
+import Text.XML.Light hiding (ppTopElement)
+import Text.Pandoc.UUID
+import Text.Pandoc.Writers.HTML
+import Text.Pandoc.Writers.Markdown ( writePlain )
+import Data.Char ( toLower )
+
+-- | Produce an EPUB file from a Pandoc document.
+writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line
+ -> WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> IO B.ByteString
+writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
+ (TOD epochtime _) <- getClockTime
+ let mkEntry path content = toEntry path epochtime content
+ let opts' = opts{ writerEmailObfuscation = NoObfuscation
+ , writerStandalone = True
+ , writerWrapText = False }
+ let sourceDir = writerSourceDirectory opts'
+
+ -- title page
+ let vars = writerVariables opts'
+ let tpContent = fromString $ writeHtmlString
+ opts'{writerTemplate = pageTemplate
+ ,writerVariables = ("titlepage","yes"):vars}
+ (Pandoc meta [])
+ let tpEntry = mkEntry "title_page.xhtml" tpContent
+
+ -- handle pictures
+ picsRef <- newIORef []
+ Pandoc _ blocks <- liftM (processWith transformBlock) $ processWithM
+ (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc
+ pics <- readIORef picsRef
+ let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e ->
+ return e{ eRelativePath = newsrc }
+ picEntries <- mapM readPicEntry pics
+
+ -- body pages
+ let isH1 (Header 1 _) = True
+ isH1 _ = False
+ let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks
+ let chunks = splitByIndices h1Indices blocks
+ let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys
+ titleize xs = Pandoc meta xs
+ let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate
+ , writerHTMLMathMethod = PlainMath }
+ let chapters = map titleize chunks
+ let chapterToEntry :: Int -> Pandoc -> Entry
+ chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $
+ fromString $ chapToHtml chap
+ let chapterEntries = zipWith chapterToEntry [1..] chapters
+
+ -- contents.opf
+ lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang")
+ (\_ -> return "en-US")
+ uuid <- getRandomUUID
+ let chapterNode ent = unode "item" !
+ [("id", takeBaseName $ eRelativePath ent),
+ ("href", eRelativePath ent),
+ ("media-type", "application/xhtml+xml")] $ ()
+ let chapterRefNode ent = unode "itemref" !
+ [("idref", takeBaseName $ eRelativePath ent)] $ ()
+ let pictureNode ent = unode "item" !
+ [("id", takeBaseName $ eRelativePath ent),
+ ("href", eRelativePath ent),
+ ("media-type", fromMaybe "application/octet-stream"
+ $ imageTypeOf $ eRelativePath ent)] $ ()
+ let plainify t = removeTrailingSpace $
+ writePlain opts'{ writerStandalone = False } $
+ Pandoc meta [Plain t]
+ let plainTitle = plainify $ docTitle meta
+ let plainAuthors = map plainify $ docAuthors meta
+ let contentsData = fromString $ ppTopElement $
+ unode "package" ! [("version","2.0")
+ ,("xmlns","http://www.idpf.org/2007/opf")
+ ,("unique-identifier","BookId")] $
+ [ metadataElement (writerEPUBMetadata opts')
+ uuid lang plainTitle plainAuthors
+ , unode "manifest" $
+ [ unode "item" ! [("id","ncx"), ("href","toc.ncx")
+ ,("media-type","application/x-dtbncx+xml")] $ ()
+ , unode "item" ! [("id","style"), ("href","stylesheet.css")
+ ,("media-type","text/css")] $ ()
+ ] ++
+ map chapterNode (tpEntry : chapterEntries) ++
+ map pictureNode picEntries
+ , unode "spine" ! [("toc","ncx")] $
+ map chapterRefNode (tpEntry : chapterEntries)
+ ]
+ let contentsEntry = mkEntry "content.opf" contentsData
+
+ -- toc.ncx
+ let navPointNode ent n tit = unode "navPoint" !
+ [("id", "navPoint-" ++ show n)
+ ,("playOrder", show n)] $
+ [ unode "navLabel" $ unode "text" tit
+ , unode "content" ! [("src",
+ eRelativePath ent)] $ ()
+ ]
+ let tocData = fromString $ ppTopElement $
+ unode "ncx" ! [("version","2005-1")
+ ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
+ [ unode "head"
+ [ unode "meta" ! [("name","dtb:uid")
+ ,("content", show uuid)] $ ()
+ , unode "meta" ! [("name","dtb:depth")
+ ,("content", "1")] $ ()
+ , unode "meta" ! [("name","dtb:totalPageCount")
+ ,("content", "0")] $ ()
+ , unode "meta" ! [("name","dtb:maxPageNumber")
+ ,("content", "0")] $ ()
+ ]
+ , unode "docTitle" $ unode "text" $ plainTitle
+ , unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries)
+ [1..(length chapterEntries + 1)]
+ ("Title Page" : map (\(Pandoc m _) ->
+ plainify $ docTitle m) chapters)
+ ]
+ let tocEntry = mkEntry "toc.ncx" tocData
+
+ -- mimetype
+ let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip"
+
+ -- container.xml
+ let containerData = fromString $ ppTopElement $
+ unode "container" ! [("version","1.0")
+ ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
+ unode "rootfiles" $
+ unode "rootfile" ! [("full-path","content.opf")
+ ,("media-type","application/oebps-package+xml")] $ ()
+ let containerEntry = mkEntry "META-INF/container.xml" containerData
+
+ -- stylesheet
+ stylesheet <- case mbStylesheet of
+ Just s -> return s
+ Nothing -> readDataFile (writerUserDataDir opts) "epub.css"
+ let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet
+
+ -- construct archive
+ let archive = foldr addEntryToArchive emptyArchive
+ (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry :
+ contentsEntry : tocEntry : (picEntries ++ chapterEntries) )
+ return $ fromArchive archive
+
+metadataElement :: String -> UUID -> String -> String -> [String] -> Element
+metadataElement metadataXML uuid lang title authors =
+ let userNodes = parseXML metadataXML
+ elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
+ ,("xmlns:opf","http://www.idpf.org/2007/opf")] $
+ filter isDublinCoreElement $ onlyElems userNodes
+ dublinElements = ["contributor","coverage","creator","date",
+ "description","format","identifier","language","publisher",
+ "relation","rights","source","subject","title","type"]
+ isDublinCoreElement e = qPrefix (elName e) == Just "dc" &&
+ qName (elName e) `elem` dublinElements
+ contains e n = not (null (findElements (QName n Nothing (Just "dc")) e))
+ newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++
+ [ unode "dc:language" lang | not (elt `contains` "language") ] ++
+ [ unode "dc:identifier" ! [("id","BookId")] $ show uuid |
+ not (elt `contains` "identifier") ] ++
+ [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ]
+ in elt{ elContent = elContent elt ++ map Elem newNodes }
+
+transformInlines :: HTMLMathMethod
+ -> FilePath
+ -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images
+ -> [Inline]
+ -> IO [Inline]
+transformInlines _ _ _ (Image lab (src,_) : xs) | isNothing (imageTypeOf src) =
+ return $ Emph lab : xs
+transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do
+ pics <- readIORef picsRef
+ let oldsrc = sourceDir </> src
+ let ext = takeExtension src
+ newsrc <- case lookup oldsrc pics of
+ Just n -> return n
+ Nothing -> do
+ let new = "images/img" ++ show (length pics) ++ ext
+ modifyIORef picsRef ( (oldsrc, new): )
+ return new
+ return $ Image lab (newsrc, tit) : xs
+transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do
+ let writeHtmlInline opts z = removeTrailingSpace $
+ writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]]
+ mathml = writeHtmlInline defaultWriterOptions{
+ writerHTMLMathMethod = MathML Nothing } x
+ fallback = writeHtmlInline defaultWriterOptions{
+ writerHTMLMathMethod = PlainMath } x
+ inOps = "<ops:switch xmlns:ops=\"http://www.idpf.org/2007/ops\">" ++
+ "<ops:case required-namespace=\"http://www.w3.org/1998/Math/MathML\">" ++
+ mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++
+ "</ops:switch>"
+ result = if "<math" `isPrefixOf` mathml then inOps else mathml
+ return $ HtmlInline result : xs
+transformInlines _ _ _ (HtmlInline _ : xs) = return $ Str "" : xs
+transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs
+transformInlines _ _ _ xs = return xs
+
+transformBlock :: Block -> Block
+transformBlock (RawHtml _) = Null
+transformBlock x = x
+
+(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element
+(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
+
+-- | Version of 'ppTopElement' that specifies UTF-8 encoding.
+ppTopElement :: Element -> String
+ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement
+
+imageTypeOf :: FilePath -> Maybe String
+imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
+ "jpg" -> Just "image/jpeg"
+ "jpeg" -> Just "image/jpeg"
+ "jfif" -> Just "image/jpeg"
+ "png" -> Just "image/png"
+ "gif" -> Just "image/gif"
+ "svg" -> Just "image/svg+xml"
+ _ -> Nothing
+
+pageTemplate :: String
+pageTemplate = unlines
+ [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
+ , "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
+ , "<head>"
+ , "<title>$title$</title>"
+ , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />"
+ , "</head>"
+ , "<body>"
+ , "$if(titlepage)$"
+ , "<h1 class=\"title\">$title$</h1>"
+ , "$for(author)$"
+ , "<h2 class=\"author\">$author$</h2>"
+ , "$endfor$"
+ , "$else$"
+ , "<h1>$title$</h1>"
+ , "$body$"
+ , "$endif$"
+ , "</body>"
+ , "</html>"
+ ]
+
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 299471328..08cd18ad0 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -285,9 +285,12 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
attrs = [theclass (unwords classes') | not (null classes')] ++
[prefixedId opts id' | not (null id')] ++
map (\(x,y) -> strAttr x y) keyvals
+ addBird = if "literate" `elem` classes'
+ then unlines . map ("> " ++) . lines
+ else unlines . lines
in return $ pre ! attrs $ thecode <<
(replicate (length leadingBreaks) br +++
- [stringToHtml $ rawCode' ++ "\n"])
+ [stringToHtml $ addBird rawCode'])
Right h -> modify (\st -> st{ stHighlighting = True }) >> return h
blockToHtml opts (BlockQuote blocks) =
-- in S5, treat list in blockquote specially
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 8aa028bd7..720c00ac8 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -338,7 +338,7 @@ inlineToLaTeX (Link txt (src, _)) =
char '}'
inlineToLaTeX (Image _ (source, _)) = do
modify $ \s -> s{ stGraphics = True }
- return $ text $ "\\includegraphics{" ++ source ++ "}"
+ return $ text $ "\\includegraphics{" ++ source ++ "}"
inlineToLaTeX (Note contents) = do
st <- get
put (st {stInNote = True})
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 77dead196..c74cd81f9 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Writers.Man ( writeMan) where
import Text.Pandoc.Definition
import Text.Pandoc.Templates
import Text.Pandoc.Shared
+import Text.Pandoc.Readers.TeXMath
import Text.Printf ( printf )
import Data.List ( isPrefixOf, intersperse, intercalate )
import Text.PrettyPrint.HughesPJ hiding ( Str )
@@ -301,9 +302,9 @@ inlineToMan _ Ellipses = return $ text "\\&..."
inlineToMan _ (Code str) =
return $ text $ "\\f[B]" ++ escapeCode str ++ "\\f[]"
inlineToMan _ (Str str) = return $ text $ escapeString str
-inlineToMan opts (Math InlineMath str) = inlineToMan opts (Code str)
+inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str
inlineToMan opts (Math DisplayMath str) = do
- contents <- inlineToMan opts (Code str)
+ contents <- inlineListToMan opts $ readTeXMath str
return $ text ".RS" $$ contents $$ text ".RE"
inlineToMan _ (TeX _) = return empty
inlineToMan _ (HtmlInline _) = return empty
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 29253ec8e..d6cd2a296 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -32,7 +32,8 @@ Markdown: <http://daringfireball.net/projects/markdown/>
module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
import Text.Pandoc.Definition
import Text.Pandoc.Templates (renderTemplate)
-import Text.Pandoc.Shared
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
import Text.Pandoc.Blocks
import Text.ParserCombinators.Parsec ( runParser, GenParser )
import Data.List ( group, isPrefixOf, find, intersperse, transpose )
@@ -40,7 +41,7 @@ import Text.PrettyPrint.HughesPJ hiding ( Str )
import Control.Monad.State
type Notes = [[Block]]
-type Refs = KeyTable
+type Refs = [([Inline], Target)]
data WriterState = WriterState { stNotes :: Notes
, stRefs :: Refs
, stPlain :: Bool }
@@ -94,7 +95,7 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do
st <- get
notes' <- notesToMarkdown opts (reverse $ stNotes st)
st' <- get -- note that the notes may contain refs
- refs' <- keyTableToMarkdown opts (reverse $ stRefs st')
+ refs' <- refsToMarkdown opts (reverse $ stRefs st')
let main = render $ body $+$ text "" $+$ notes' $+$ text "" $+$ refs'
let context = writerVariables opts ++
[ ("toc", render toc)
@@ -109,8 +110,8 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do
else return main
-- | Return markdown representation of reference key table.
-keyTableToMarkdown :: WriterOptions -> KeyTable -> State WriterState Doc
-keyTableToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
+refsToMarkdown :: WriterOptions -> Refs -> State WriterState Doc
+refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
-- | Return markdown representation of a reference key.
keyToMarkdown :: WriterOptions
@@ -238,7 +239,7 @@ blockToMarkdown opts (Table caption aligns widths headers rows) = do
caption' <- inlineListToMarkdown opts caption
let caption'' = if null caption
then empty
- else text "" $+$ (text "Table: " <> caption')
+ else text "" $+$ (text ": " <> caption')
headers' <- mapM (blockListToMarkdown opts) headers
let alignHeader alignment = case alignment of
AlignLeft -> leftAlignBlock
@@ -372,14 +373,14 @@ inlineToMarkdown opts (Subscript lst) = do
inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst
inlineToMarkdown opts (Quoted SingleQuote lst) = do
contents <- inlineListToMarkdown opts lst
- return $ char '\'' <> contents <> char '\''
+ return $ char '‘' <> contents <> char '’'
inlineToMarkdown opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMarkdown opts lst
- return $ char '"' <> contents <> char '"'
-inlineToMarkdown _ EmDash = return $ text "--"
-inlineToMarkdown _ EnDash = return $ char '-'
-inlineToMarkdown _ Apostrophe = return $ char '\''
-inlineToMarkdown _ Ellipses = return $ text "..."
+ return $ char '“' <> contents <> char '”'
+inlineToMarkdown _ EmDash = return $ char '\8212'
+inlineToMarkdown _ EnDash = return $ char '\8211'
+inlineToMarkdown _ Apostrophe = return $ char '\8217'
+inlineToMarkdown _ Ellipses = return $ char '\8230'
inlineToMarkdown _ (Code str) =
let tickGroups = filter (\s -> '`' `elem` s) $ group str
longest = if null tickGroups
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
new file mode 100644
index 000000000..3b5ea7481
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -0,0 +1,86 @@
+{-
+Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Native
+ Copyright : Copyright (C) 2006-2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Utility functions and definitions used by the various Pandoc modules.
+-}
+module Text.Pandoc.Writers.Native ( writeNative )
+where
+import Text.Pandoc.Shared ( WriterOptions )
+import Data.List ( intercalate )
+import Text.Pandoc.Definition
+
+-- | Indent string as a block.
+indentBy :: Int -- ^ Number of spaces to indent the block
+ -> Int -- ^ Number of spaces (rel to block) to indent first line
+ -> String -- ^ Contents of block to indent
+ -> String
+indentBy _ _ [] = ""
+indentBy num first str =
+ let (firstLine:restLines) = lines str
+ firstLineIndent = num + first
+ in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
+ (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines)
+
+-- | Prettyprint list of Pandoc blocks elements.
+prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
+ -> [Block] -- ^ List of blocks
+ -> String
+prettyBlockList indent [] = indentBy indent 0 "[]"
+prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
+ (intercalate "\n, " (map prettyBlock blocks)) ++ " ]"
+
+-- | Prettyprint Pandoc block element.
+prettyBlock :: Block -> String
+prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
+ (prettyBlockList 2 blocks)
+prettyBlock (OrderedList attribs blockLists) =
+ "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
+ (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks)
+ blockLists)) ++ " ]"
+prettyBlock (BulletList blockLists) = "BulletList\n" ++
+ indentBy 2 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
+prettyBlock (DefinitionList items) = "DefinitionList\n" ++
+ indentBy 2 0 ("[ " ++ (intercalate "\n, "
+ (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++
+ indentBy 3 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++
+ ")") items))) ++ " ]"
+prettyBlock (Table caption aligns widths header rows) =
+ "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
+ show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
+ (intercalate ",\n" (map prettyRow rows)) ++ " ]"
+ where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", "
+ (map (\blocks -> prettyBlockList 2 blocks)
+ cols))) ++ " ]"
+prettyBlock block = show block
+
+-- | Prettyprint Pandoc document.
+writeNative :: WriterOptions -> Pandoc -> String
+writeNative _ (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
+ ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
+
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
new file mode 100644
index 000000000..5aa0fd310
--- /dev/null
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -0,0 +1,83 @@
+{-
+Copyright (C) 2008-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.ODT
+ Copyright : Copyright (C) 2008-2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to ODT.
+-}
+module Text.Pandoc.Writers.ODT ( writeODT ) where
+import Data.IORef
+import System.FilePath ( (</>), takeExtension )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 ( fromString )
+import Codec.Archive.Zip
+import System.Time
+import Paths_pandoc ( getDataFileName )
+import Text.Pandoc.Shared ( WriterOptions(..) )
+import Text.Pandoc.Definition
+import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
+import System.Directory
+import Control.Monad (liftM)
+
+-- | Produce an ODT file from a Pandoc document.
+writeODT :: Maybe FilePath -- ^ Path specified by --reference-odt
+ -> WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> IO B.ByteString
+writeODT mbRefOdt opts doc = do
+ let datadir = writerUserDataDir opts
+ refArchive <- liftM toArchive $
+ case mbRefOdt of
+ Just f -> B.readFile f
+ Nothing -> do
+ let defaultODT = getDataFileName "reference.odt" >>= B.readFile
+ case datadir of
+ Nothing -> defaultODT
+ Just d -> do
+ exists <- doesFileExist (d </> "reference.odt")
+ if exists
+ then B.readFile (d </> "reference.odt")
+ else defaultODT
+ -- handle pictures
+ picEntriesRef <- newIORef ([] :: [Entry])
+ let sourceDir = writerSourceDirectory opts
+ doc' <- processWithM (transformPic sourceDir picEntriesRef) doc
+ let newContents = writeOpenDocument opts doc'
+ (TOD epochtime _) <- getClockTime
+ let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
+ picEntries <- readIORef picEntriesRef
+ let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
+ return $ fromArchive archive
+
+transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
+transformPic sourceDir entriesRef (Image lab (src,tit)) = do
+ entries <- readIORef entriesRef
+ let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src
+ catch (readEntry [] (sourceDir </> src) >>= \entry ->
+ modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >>
+ return (Image lab (newsrc, tit)))
+ (\_ -> return (Emph lab))
+transformPic _ _ x = return x
+
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index f4dfb2aa6..14566252c 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -39,10 +39,12 @@ import Text.PrettyPrint.HughesPJ hiding ( Str )
import Control.Monad.State
import Control.Applicative ( (<$>) )
+type Refs = [([Inline], Target)]
+
data WriterState =
WriterState { stNotes :: [[Block]]
- , stLinks :: KeyTable
- , stImages :: KeyTable
+ , stLinks :: Refs
+ , stImages :: Refs
, stHasMath :: Bool
, stOptions :: WriterOptions
}
@@ -65,8 +67,8 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do
body <- blockListToRST blocks
notes <- liftM (reverse . stNotes) get >>= notesToRST
-- note that the notes may contain refs, so we do them first
- refs <- liftM (reverse . stLinks) get >>= keyTableToRST
- pics <- liftM (reverse . stImages) get >>= pictTableToRST
+ refs <- liftM (reverse . stLinks) get >>= refsToRST
+ pics <- liftM (reverse . stImages) get >>= pictRefsToRST
hasMath <- liftM stHasMath get
let main = render $ body $+$ notes $+$ text "" $+$ refs $+$ pics
let context = writerVariables opts ++
@@ -80,8 +82,8 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do
else return main
-- | Return RST representation of reference key table.
-keyTableToRST :: KeyTable -> State WriterState Doc
-keyTableToRST refs = mapM keyToRST refs >>= return . vcat
+refsToRST :: Refs -> State WriterState Doc
+refsToRST refs = mapM keyToRST refs >>= return . vcat
-- | Return RST representation of a reference key.
keyToRST :: ([Inline], (String, String))
@@ -107,8 +109,8 @@ noteToRST num note = do
return $ marker $$ nest 3 contents
-- | Return RST representation of picture reference table.
-pictTableToRST :: KeyTable -> State WriterState Doc
-pictTableToRST refs = mapM pictToRST refs >>= return . vcat
+pictRefsToRST :: Refs -> State WriterState Doc
+pictRefsToRST refs = mapM pictToRST refs >>= return . vcat
-- | Return RST representation of a picture substitution reference.
pictToRST :: ([Inline], (String, String))
@@ -280,16 +282,16 @@ inlineToRST (Subscript lst) = do
inlineToRST (SmallCaps lst) = inlineListToRST lst
inlineToRST (Quoted SingleQuote lst) = do
contents <- inlineListToRST lst
- return $ char '\'' <> contents <> char '\''
+ return $ char '‘' <> contents <> char '’'
inlineToRST (Quoted DoubleQuote lst) = do
contents <- inlineListToRST lst
- return $ char '"' <> contents <> char '"'
+ return $ char '“' <> contents <> char '”'
inlineToRST (Cite _ lst) =
inlineListToRST lst
-inlineToRST EmDash = return $ text "--"
-inlineToRST EnDash = return $ char '-'
-inlineToRST Apostrophe = return $ char '\''
-inlineToRST Ellipses = return $ text "..."
+inlineToRST EmDash = return $ char '\8212'
+inlineToRST EnDash = return $ char '\8211'
+inlineToRST Apostrophe = return $ char '\8217'
+inlineToRST Ellipses = return $ char '\8230'
inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``"
inlineToRST (Str str) = return $ text $ escapeString str
inlineToRST (Math t str) = do
diff --git a/src/markdown2pdf.hs b/src/markdown2pdf.hs
index d713ae263..cc6a034c0 100644
--- a/src/markdown2pdf.hs
+++ b/src/markdown2pdf.hs
@@ -9,14 +9,7 @@ import Control.Exception (tryJust, bracket)
import System.IO (stderr)
import System.IO.Error (isDoesNotExistError)
import System.Environment ( getArgs, getProgName )
--- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv
--- So we use System.IO.UTF8 only if we have an earlier version
-#if MIN_VERSION_base(4,2,0)
-import System.IO (hPutStrLn)
-#else
-import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
-import System.IO.UTF8
-#endif
+import qualified Text.Pandoc.UTF8 as UTF8
import System.Exit (ExitCode (..), exitWith)
import System.FilePath
import System.Directory
@@ -57,7 +50,7 @@ runLatexRaw latexProgram file = do
takeDirectory file, dropExtension file] >> return ()
let pdfFile = replaceExtension file "pdf"
let logFile = replaceExtension file "log"
- txt <- tryJust (guard . isDoesNotExistError) (readFile logFile)
+ txt <- tryJust (guard . isDoesNotExistError) (UTF8.readFile logFile)
let checks = checkLatex $ either (const "") id txt
case checks of
-- err , bib , ref , msg
@@ -122,13 +115,13 @@ runBibtex file = do
exit :: String -> IO a
exit x = do
progName <- getProgName
- hPutStrLn stderr $ progName ++ ": " ++ x
+ UTF8.hPutStrLn stderr $ progName ++ ": " ++ x
exitWith $ ExitFailure 1
saveStdin :: FilePath -> IO (Either String FilePath)
saveStdin file = do
- text <- getContents
- writeFile file text
+ text <- UTF8.getContents
+ UTF8.writeFile file text
fileExist <- doesFileExist file
case fileExist of
False -> return $ Left $! "Could not create " ++ file
@@ -137,7 +130,7 @@ saveStdin file = do
saveOutput :: FilePath -> FilePath -> IO ()
saveOutput input output = do
copyFile input output
- hPutStrLn stderr $! "Created " ++ output
+ UTF8.hPutStrLn stderr $! "Created " ++ output
main :: IO ()
main = bracket
@@ -161,7 +154,8 @@ main = bracket
"--number-sections","--include-in-header",
"--include-before-body","--include-after-body",
"--custom-header","--output",
- "--template", "--variable"]
+ "--template", "--variable",
+ "--csl", "--biblio", "--biblio-format"]
let isOpt ('-':_) = True
isOpt _ = False
let opts = filter isOpt args
@@ -170,8 +164,8 @@ main = bracket
any (\o -> (o ++ "=") `isPrefixOf` x) goodoptslong
unless (all isGoodopt opts) $ do
(code, out, _err) <- readProcessWithExitCode "pandoc" ["--help"] ""
- putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:"
- putStr $ unlines $
+ UTF8.putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:"
+ UTF8.putStr $ unlines $
filter (\l -> any (`isInfixOf` l) goodoptslong) $ lines out
exitWith code
diff --git a/src/pandoc.hs b/src/pandoc.hs
index 0560efc0a..a2be60d52 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -30,9 +30,9 @@ writers.
-}
module Main where
import Text.Pandoc
-import Text.Pandoc.ODT
import Text.Pandoc.Writers.S5 (s5HeaderIncludes)
-import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile )
+import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile,
+ headerShift )
#ifdef _HIGHLIGHTING
import Text.Pandoc.Highlighting ( languages )
#endif
@@ -45,14 +45,7 @@ import Data.Char ( toLower, isDigit )
import Data.List ( intercalate, isSuffixOf )
import System.Directory ( getAppUserDataDirectory )
import System.IO ( stdout, stderr )
--- Note: ghc >= 6.12 (base >=4.2) supports unicode through iconv
--- So we use System.IO.UTF8 only if we have an earlier version
-#if MIN_VERSION_base(4,2,0)
-import System.IO ( hPutStr, hPutStrLn )
-#else
-import Prelude hiding ( putStr, putStrLn, writeFile, readFile, getContents )
-import System.IO.UTF8
-#endif
+import qualified Text.Pandoc.UTF8 as UTF8
#ifdef _CITEPROC
import Text.CSL
import Text.Pandoc.Biblio
@@ -60,7 +53,9 @@ import Text.Pandoc.Biblio
import Control.Monad (when, unless, liftM)
import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
import Network.URI (parseURI, isURI)
-import Data.ByteString.Lazy.UTF8 (toString)
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 (toString, fromString)
+import Codec.Binary.UTF8.String (decodeString)
copyrightMessage :: String
copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++
@@ -108,13 +103,14 @@ readPandoc _ = read
-- | Association list of formats and writers.
writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
-writers = [("native" , writeDoc)
+writers = [("native" , writeNative)
,("html" , writeHtmlString)
,("html+lhs" , writeHtmlString)
,("s5" , writeS5String)
,("docbook" , writeDocbook)
,("opendocument" , writeOpenDocument)
- ,("odt" , writeOpenDocument)
+ ,("odt" , \_ _ -> "")
+ ,("epub" , \_ _ -> "")
,("latex" , writeLaTeX)
,("latex+lhs" , writeLaTeX)
,("context" , writeConTeXt)
@@ -130,17 +126,7 @@ writers = [("native" , writeDoc)
]
isNonTextOutput :: String -> Bool
-isNonTextOutput = (`elem` ["odt"])
-
--- | Writer for Pandoc native format.
-writeDoc :: WriterOptions -> Pandoc -> String
-writeDoc _ = prettyPandoc
-
-headerShift :: Int -> Pandoc -> Pandoc
-headerShift n = processWith shift
- where shift :: Block -> Block
- shift (Header level inner) = Header (level + n) inner
- shift x = x
+isNonTextOutput = (`elem` ["odt","epub"])
-- | Data structure for command line options.
data Opt = Opt
@@ -154,8 +140,6 @@ data Opt = Opt
, optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply
, optTemplate :: String -- ^ Custom template
, optVariables :: [(String,String)] -- ^ Template variables to set
- , optBefore :: [String] -- ^ Texts to include before body
- , optAfter :: [String] -- ^ Texts to include after body
, optOutputFile :: String -- ^ Name of output file
, optNumberSections :: Bool -- ^ Number sections in LaTeX
, optIncremental :: Bool -- ^ Use incremental lists in S5
@@ -163,6 +147,8 @@ data Opt = Opt
, optSmart :: Bool -- ^ Use smart typography
, optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
, optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt
+ , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet
+ , optEPUBMetadata :: String -- ^ EPUB metadata
, optDumpArgs :: Bool -- ^ Output command-line arguments
, optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
, optStrict :: Bool -- ^ Use strict markdown syntax
@@ -194,8 +180,6 @@ defaultOpts = Opt
, optTransforms = []
, optTemplate = ""
, optVariables = []
- , optBefore = []
- , optAfter = []
, optOutputFile = "-" -- "-" means stdout
, optNumberSections = False
, optIncremental = False
@@ -203,6 +187,8 @@ defaultOpts = Opt
, optSmart = False
, optHTMLMathMethod = PlainMath
, optReferenceODT = Nothing
+ , optEPUBStylesheet = Nothing
+ , optEPUBMetadata = ""
, optDumpArgs = False
, optIgnoreArgs = False
, optStrict = False
@@ -343,7 +329,7 @@ options =
"references" -> return ReferenceObfuscation
"javascript" -> return JavascriptObfuscation
"none" -> return NoObfuscation
- _ -> hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
+ _ -> UTF8.hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
exitWith (ExitFailure 6)
return opt { optEmailObfuscation = method })
"none|javascript|references")
@@ -377,7 +363,7 @@ options =
return opt{ optTransforms =
headerShift shift : oldTransforms }
else do
- hPutStrLn stderr $ "base-header-level must be a number >= 1"
+ UTF8.hPutStrLn stderr $ "base-header-level must be a number >= 1"
exitWith $ ExitFailure 19)
"LEVEL")
"" -- "Headers base level"
@@ -385,7 +371,7 @@ options =
, Option "" ["template"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
return opt{ optTemplate = text,
optStandalone = True })
"FILENAME")
@@ -399,7 +385,7 @@ options =
let newvars = optVariables opt ++ [(k,v)]
return opt{ optVariables = newvars }
_ -> do
- hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)"
+ UTF8.hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)"
exitWith $ ExitFailure 17)
"FILENAME")
"" -- "Use custom template"
@@ -417,7 +403,7 @@ options =
, Option "H" ["include-in-header"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
-- add new ones to end, so they're included in order specified
let newvars = optVariables opt ++ [("header-includes",text)]
return opt { optVariables = newvars,
@@ -428,7 +414,7 @@ options =
, Option "B" ["include-before-body"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
-- add new ones to end, so they're included in order specified
let newvars = optVariables opt ++ [("include-before",text)]
return opt { optVariables = newvars,
@@ -439,7 +425,7 @@ options =
, Option "A" ["include-after-body"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
-- add new ones to end, so they're included in order specified
let newvars = optVariables opt ++ [("include-after",text)]
return opt { optVariables = newvars,
@@ -450,7 +436,7 @@ options =
, Option "C" ["custom-header"]
(ReqArg
(\arg opt -> do
- text <- readFile arg
+ text <- UTF8.readFile arg
let newVars = ("legacy-header", text) : optVariables opt
return opt { optVariables = newVars
, optStandalone = True })
@@ -473,12 +459,28 @@ options =
"FILENAME")
"" -- "Path of custom reference.odt"
+ , Option "" ["epub-stylesheet"]
+ (ReqArg
+ (\arg opt -> do
+ text <- UTF8.readFile arg
+ return opt { optEPUBStylesheet = Just text })
+ "FILENAME")
+ "" -- "Path of epub.css"
+
+ , Option "" ["epub-metadata"]
+ (ReqArg
+ (\arg opt -> do
+ text <- UTF8.readFile arg
+ return opt { optEPUBMetadata = text })
+ "FILENAME")
+ "" -- "Path of epub metadata file"
+
, Option "D" ["print-default-template"]
(ReqArg
(\arg _ -> do
templ <- getDefaultTemplate Nothing arg
case templ of
- Right t -> hPutStr stdout t
+ Right t -> UTF8.hPutStr stdout t
Left e -> error $ show e
exitWith ExitSuccess)
"FORMAT")
@@ -520,7 +522,7 @@ options =
(NoArg
(\_ -> do
prg <- getProgName
- hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
+ UTF8.hPutStrLn stdout (prg ++ " " ++ pandocVersion ++ compileInfo ++
copyrightMessage)
exitWith ExitSuccess ))
"" -- "Print version"
@@ -529,7 +531,7 @@ options =
(NoArg
(\_ -> do
prg <- getProgName
- hPutStr stdout (usageMessage prg options)
+ UTF8.hPutStr stdout (usageMessage prg options)
exitWith ExitSuccess ))
"" -- "Show help"
]
@@ -586,13 +588,14 @@ defaultWriterName x =
".texinfo" -> "texinfo"
".db" -> "docbook"
".odt" -> "odt"
+ ".epub" -> "epub"
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"
main :: IO ()
main = do
- rawArgs <- getArgs
+ rawArgs <- liftM (map decodeString) getArgs
prg <- getProgName
let compatMode = (prg == "hsmarkdown")
@@ -602,8 +605,8 @@ main = do
unless (null errors) $
do name <- getProgName
- mapM_ (\e -> hPutStr stderr (name ++ ": ") >> hPutStr stderr e) errors
- hPutStrLn stderr $ "Try " ++ name ++ " --help for more information."
+ mapM_ (\e -> UTF8.hPutStr stderr (name ++ ": ") >> UTF8.hPutStr stderr e) errors
+ UTF8.hPutStrLn stderr $ "Try " ++ name ++ " --help for more information."
exitWith $ ExitFailure 2
let defaultOpts' = if compatMode
@@ -622,8 +625,6 @@ main = do
, optWriter = writerName
, optParseRaw = parseRaw
, optVariables = variables
- , optBefore = befores
- , optAfter = afters
, optTableOfContents = toc
, optTransforms = transforms
, optTemplate = template
@@ -634,6 +635,8 @@ main = do
, optSmart = smart
, optHTMLMathMethod = mathMethod
, optReferenceODT = referenceODT
+ , optEPUBStylesheet = epubStylesheet
+ , optEPUBMetadata = epubMetadata
, optDumpArgs = dumpArgs
, optIgnoreArgs = ignoreArgs
, optStrict = strict
@@ -652,13 +655,13 @@ main = do
} = opts
when dumpArgs $
- do hPutStrLn stdout outputFile
- mapM_ (\arg -> hPutStrLn stdout arg) args
+ do UTF8.hPutStrLn stdout outputFile
+ mapM_ (\arg -> UTF8.hPutStrLn stdout arg) args
exitWith ExitSuccess
-- warn about deprecated options
case lookup "legacy-header" variables of
- Just _ -> hPutStrLn stderr $
+ Just _ -> UTF8.hPutStrLn stderr $
"Warning: The -C/--custom-header is deprecated.\n" ++
"Please transition to using --template instead."
Nothing -> return ()
@@ -687,9 +690,13 @@ main = do
Just r -> return r
Nothing -> error ("Unknown reader: " ++ readerName')
- writer <- case (lookup writerName' writers) of
- Just r -> return r
- Nothing -> error ("Unknown writer: " ++ writerName')
+ let writer = case lookup writerName' writers of
+ Just _ | writerName' == "epub" -> writeEPUB epubStylesheet
+ Just _ | writerName' == "odt" -> writeODT referenceODT
+ Just r -> \o ->
+ return . fromString . r o
+ Nothing -> error $ "Unknown writer: " ++
+ writerName'
templ <- getDefaultTemplate datadir writerName'
let defaultTemplate = case templ of
@@ -722,6 +729,10 @@ main = do
return $ ("mathml-script", s) : variables'
_ -> return variables'
+ let sourceDir = if null sources
+ then "."
+ else takeDirectory (head sources)
+
let startParserState =
defaultParserState { stateParseRaw = parseRaw,
stateTabStop = tabStop,
@@ -737,13 +748,13 @@ main = do
stateColumns = columns,
stateStrict = strict,
stateIndentedCodeClasses = codeBlockClasses }
+
let writerOptions = WriterOptions { writerStandalone = standalone',
writerTemplate = if null template
then defaultTemplate
else template,
writerVariables = variables'',
- writerIncludeBefore = concat befores,
- writerIncludeAfter = concat afters,
+ writerEPUBMetadata = epubMetadata,
writerTabStop = tabStop,
writerTableOfContents = toc &&
writerName' /= "s5",
@@ -761,23 +772,21 @@ main = do
writerEmailObfuscation = if strict
then ReferenceObfuscation
else obfuscationMethod,
- writerIdentifierPrefix = idPrefix }
+ writerIdentifierPrefix = idPrefix,
+ writerSourceDirectory = sourceDir,
+ writerUserDataDir = datadir }
when (isNonTextOutput writerName' && outputFile == "-") $
- do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
+ do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
"Specify an output file using the -o option.")
exitWith $ ExitFailure 5
- let sourceDirRelative = if null sources
- then ""
- else takeDirectory (head sources)
-
let readSources [] = mapM readSource ["-"]
readSources srcs = mapM readSource srcs
- readSource "-" = getContents
+ readSource "-" = UTF8.getContents
readSource src = case parseURI src of
Just u -> readURI u
- Nothing -> readFile src
+ Nothing -> UTF8.readFile src
readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>=
return . toString -- treat all as UTF8
@@ -794,10 +803,8 @@ main = do
return doc'
#endif
- let writerOutput = writer writerOptions doc'' ++ "\n"
+ writerOutput <- writer writerOptions doc''
- case writerName' of
- "odt" -> saveOpenDocumentAsODT datadir outputFile sourceDirRelative referenceODT writerOutput
- _ -> if outputFile == "-"
- then putStr writerOutput
- else writeFile outputFile writerOutput
+ if outputFile == "-"
+ then B.putStrLn writerOutput
+ else B.writeFile outputFile writerOutput