diff options
-rw-r--r-- | Main.hs | 106 | ||||
-rw-r--r-- | README | 27 | ||||
-rw-r--r-- | Text/Pandoc/PDF.hs | 202 | ||||
-rw-r--r-- | pandoc.cabal | 1 |
4 files changed, 176 insertions, 160 deletions
@@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Main Copyright : Copyright (C) 2006-8 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> - Stability : alpha + Stability : alpha Portability : portable Parses command-line options and calls the appropriate readers and @@ -32,6 +32,7 @@ writers. module Main where import Text.Pandoc import Text.Pandoc.ODT +import Text.Pandoc.PDF import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) ) import Text.Pandoc.Highlighting ( languages ) import System.Environment ( getArgs, getProgName, getEnvironment ) @@ -98,7 +99,7 @@ readers = [("native" , readPandoc) -- | Reader for native Pandoc format. readPandoc :: ParserState -> String -> Pandoc readPandoc _ input = read input - + -- | Association list of formats and pairs of writers and default headers. writers :: [ ( String, ( WriterOptions -> Pandoc -> String, String ) ) ] writers = [("native" , (writeDoc, "")) @@ -108,6 +109,7 @@ writers = [("native" , (writeDoc, "")) ,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader)) ,("odt" , (writeOpenDocument, defaultOpenDocumentHeader)) ,("latex" , (writeLaTeX, defaultLaTeXHeader)) + ,("pdf" , (writeLaTeX, defaultLaTeXHeader)) ,("context" , (writeConTeXt, defaultConTeXtHeader)) ,("texinfo" , (writeTexinfo, "")) ,("man" , (writeMan, "")) @@ -118,8 +120,7 @@ writers = [("native" , (writeDoc, "")) ] isNonTextOutput :: String -> Bool -isNonTextOutput "odt" = True -isNonTextOutput _ = False +isNonTextOutput = (`elem` ["odt", "pdf"]) -- | Writer for Pandoc native format. writeDoc :: WriterOptions -> Pandoc -> String @@ -205,7 +206,7 @@ options = (\arg opt -> return opt { optWriter = map toLower arg }) "FORMAT") "" -- ("(" ++ (joinWithSep ", " $ map fst writers) ++ ")") - + , Option "s" ["standalone"] (NoArg (\opt -> return opt { optStandalone = True })) @@ -250,7 +251,7 @@ options = , Option "m" ["asciimathml"] (OptArg - (\arg opt -> return opt { optHTMLMathMethod = + (\arg opt -> return opt { optHTMLMathMethod = ASCIIMathML arg }) "URL") "" -- "Use ASCIIMathML script in html output" @@ -290,13 +291,13 @@ options = , Option "" ["toc", "table-of-contents"] (NoArg (\opt -> return opt { optTableOfContents = True })) - "" -- "Include table of contents" + "" -- "Include table of contents" , Option "c" ["css"] (ReqArg - (\arg opt -> do + (\arg opt -> do let old = optCSS opt - return opt { optCSS = old ++ [arg], + return opt { optCSS = old ++ [arg], optStandalone = True }) "CSS") "" -- "Link to CSS style sheet" @@ -340,17 +341,17 @@ options = , Option "T" ["title-prefix"] (ReqArg - (\arg opt -> return opt { optTitlePrefix = arg, + (\arg opt -> return opt { optTitlePrefix = arg, optStandalone = True }) "STRING") "" -- "String to prefix to HTML window title" - + , Option "D" ["print-default-header"] (ReqArg (\arg _ -> do let header = case (lookup arg writers) of Just (_, h) -> h - Nothing -> error ("Unknown reader: " ++ arg) + Nothing -> error ("Unknown reader: " ++ arg) hPutStr stdout header exitWith ExitSuccess) "FORMAT") @@ -376,7 +377,7 @@ options = (NoArg (\opt -> return opt { optIgnoreArgs = True })) "" -- "Ignore command-line arguments." - + , Option "v" ["version"] (NoArg (\_ -> do @@ -398,15 +399,15 @@ options = -- Returns usage message usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName opts = usageInfo - (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ - (joinWithSep ", " $ map fst readers) ++ "\nOutput formats: " ++ + (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ + (joinWithSep ", " $ map fst readers) ++ "\nOutput formats: " ++ (joinWithSep ", " $ map fst writers) ++ "\nOptions:") opts - + -- Determine default reader based on source file extensions defaultReaderName :: [FilePath] -> String defaultReaderName [] = "markdown" -defaultReaderName (x:xs) = +defaultReaderName (x:xs) = case takeExtension (map toLower x) of ".xhtml" -> "html" ".html" -> "html" @@ -441,6 +442,7 @@ defaultWriterName x = ".texinfo" -> "texinfo" ".db" -> "docbook" ".odt" -> "odt" + ".pdf" -> "pdf" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" @@ -464,7 +466,7 @@ main = do else return () - let defaultOpts' = if compatMode + let defaultOpts' = if compatMode then defaultOpts { optReader = "markdown" , optWriter = "html" , optStrict = True } @@ -513,11 +515,11 @@ main = do let sources = if ignoreArgs then [] else args -- assign reader and writer based on options and filenames - let readerName' = if null readerName + let readerName' = if null readerName then defaultReaderName sources else readerName - let writerName' = if null writerName + let writerName' = if null writerName then defaultWriterName outputFile else writerName @@ -539,74 +541,76 @@ main = do -- remove DOS line endings tabFilter _ ('\r':'\n':xs) = '\n':(tabFilter tabStop xs) tabFilter _ ('\r':xs) = '\n':(tabFilter tabStop xs) - tabFilter spsToNextStop ('\t':xs) = + tabFilter spsToNextStop ('\t':xs) = if preserveTabs - then '\t':(tabFilter tabStop xs) - else replicate spsToNextStop ' ' ++ tabFilter tabStop xs - tabFilter 1 (x:xs) = + then '\t':(tabFilter tabStop xs) + else replicate spsToNextStop ' ' ++ tabFilter tabStop xs + tabFilter 1 (x:xs) = x:(tabFilter tabStop xs) - tabFilter spsToNextStop (x:xs) = + tabFilter spsToNextStop (x:xs) = x:(tabFilter (spsToNextStop - 1) xs) - let standalone' = (standalone && not strict) || writerName' == "odt" + let standalone' = (standalone && not strict) || isNonTextOutput writerName' #ifdef _CITEPROC refs <- if null modsFile then return [] else readModsColletionFile modsFile #endif - let startParserState = + let startParserState = defaultParserState { stateParseRaw = parseRaw, - stateTabStop = tabStop, + stateTabStop = tabStop, stateSanitizeHTML = sanitize, stateStandalone = standalone', #ifdef _CITEPROC stateCitations = map citeKey refs, #endif - stateSmart = smart || writerName' `elem` + stateSmart = smart || writerName' `elem` ["latex", "context"], stateColumns = columns, stateStrict = strict } let csslink = if null css then "" - else concatMap + else concatMap (\f -> "<link rel=\"stylesheet\" href=\"" ++ f ++ "\" type=\"text/css\" media=\"all\" />\n") css - let header = (if (customHeader == "DEFAULT") + let header = (if (customHeader == "DEFAULT") then defaultHeader else customHeader) ++ csslink ++ includeHeader let writerOptions = WriterOptions { writerStandalone = standalone', - writerHeader = header, + writerHeader = header, writerTitlePrefix = titlePrefix, - writerTabStop = tabStop, + writerTabStop = tabStop, writerTableOfContents = toc && (not strict) && writerName' /= "s5", writerHTMLMathMethod = mathMethod, writerS5 = (writerName' == "s5"), writerIgnoreNotes = False, - writerIncremental = incremental, + writerIncremental = incremental, writerNumberSections = numberSections, - writerIncludeBefore = includeBefore, + writerIncludeBefore = includeBefore, writerIncludeAfter = includeAfter, writerStrictMarkdown = strict, writerReferenceLinks = referenceLinks, writerWrapText = wrap } - let writeOutput = if writerName' == "odt" - then if outputFile == "-" - then \_ -> do - hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ - " output to stdout.\n" ++ - "Specify an output file using the -o option.") - exitWith $ ExitFailure 5 - else let sourceDirRelative = if null sources - then "" - else takeDirectory (head sources) - in saveOpenDocumentAsODT outputFile sourceDirRelative - else if outputFile == "-" - then putStrLn - else writeFile outputFile . (++ "\n") + if isNonTextOutput writerName' && outputFile == "-" + then do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++ + "Specify an output file using the -o option.") + exitWith $ ExitFailure 5 + else return () + + let sourceDirRelative = if null sources + then "" + else takeDirectory (head sources) + + let writeOutput = case writerName' of + "odt" -> saveOpenDocumentAsODT outputFile sourceDirRelative + "pdf" -> saveLaTeXAsPDF outputFile sourceDirRelative + _ -> if outputFile == "-" + then putStrLn + else writeFile outputFile . (++ "\n") fmap (reader startParserState . tabFilter tabStop . joinWithSep "\n") (readSources sources) >>= @@ -615,7 +619,7 @@ main = do #endif writeOutput . writer writerOptions - where + where readSources [] = mapM readSource ["-"] readSources sources = mapM readSource sources readSource "-" = getContents @@ -82,11 +82,28 @@ To convert `hello.html` from html to markdown: pandoc -f html -t markdown hello.html Supported output formats include `markdown`, `latex`, `context` -(ConTeXt), `html`, `rtf` (rich text format), `rst` (reStructuredText), -`docbook` (DocBook XML), `opendocument` (OpenDocument XML), `odt` -(OpenOffice text document), `texinfo`, (GNU Texinfo), `mediawiki` -(MediaWiki markup), `man` (groff man), and `s5` (which produces an -HTML file that acts like powerpoint). +(ConTeXt), `pdf`, `html`, `rtf` (rich text format), `rst` +(reStructuredText), `docbook` (DocBook XML), `opendocument` +(OpenDocument XML), `odt` (OpenOffice text document), `texinfo`, (GNU +Texinfo), `mediawiki` (MediaWiki markup), `man` (groff man), and `s5` +(which produces an HTML file that acts like powerpoint). + +Notes: + +- For `odt` output, you must have `zip` in the path. If you + don't have it installed, you can get the free [Info-ZIP]. + +- For `pdf` output, you must have `pdflatex` and `bibtex` in the + path. You should also have the following LaTeX packages installed: + `unicode`, `fancyhdr` (if you have verbatim text in footnotes), + `graphicx` (if you use images), `array` (if you use tables), and + `ulem` (if you use strikeout text). If they are not already included + in your LaTeX distribution, you can get them from [CTAN]. A full + [TeX Live] or [MacTeX] distribution will have all of these packages. + + [Info-ZIP]: http://www.info-zip.org/Zip.html + [TeX Live]: http://www.tug.org/texlive/ + [MacTeX]: http://www.tug.org/mactex/ Supported input formats include `markdown`, `html`, `latex`, and `rst`. Note that the `rst` reader only parses a subset of reStructuredText diff --git a/Text/Pandoc/PDF.hs b/Text/Pandoc/PDF.hs index 5b900bf03..fa1e6dcf5 100644 --- a/Text/Pandoc/PDF.hs +++ b/Text/Pandoc/PDF.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TemplateHaskell, CPP #-} +{-# LANGUAGE CPP #-} {- Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu> @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | - Module : Text.Pandoc.ODT + Module : Text.Pandoc.PDF Copyright : Copyright (C) 2006-7 John MacFarlane License : GNU GPL, version 2 or above @@ -26,119 +26,113 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Stability : alpha Portability : portable -Functions for producing an ODT file from OpenDocument XML. +Functions for producing a PDF file from LaTeX. -} -module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where -import Text.Pandoc.TH ( binaryContentsOf ) -import Data.Maybe ( fromJust ) -import Data.List ( partition, intersperse ) +module Text.Pandoc.PDF ( saveLaTeXAsPDF ) where +import Data.List ( isInfixOf ) import System.Directory -import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories ) +import System.FilePath ( (</>), (<.>), takeBaseName ) import System.Process ( runProcess, waitForProcess ) import System.Exit -import Text.XML.Light -import Text.XML.Light.Cursor +import System.Environment ( getEnvironment ) import Text.Pandoc.Shared ( withTempDir ) -import Network.URI ( isURI ) -import qualified Data.ByteString as B ( writeFile, pack ) -import Data.ByteString.Internal ( c2w ) -import Prelude hiding ( writeFile, readFile ) -import System.IO ( stderr ) +import Prelude hiding ( writeFile, readFile, putStrLn ) +import System.IO ( stderr, openFile, IOMode (..) ) #ifdef _UTF8STRING import System.IO.UTF8 #else import Text.Pandoc.UTF8 #endif --- | Produce an ODT file from OpenDocument XML. -saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced. - -> FilePath -- ^ Relative directory of source file. - -> String -- ^ OpenDocument XML contents. - -> IO () -saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do - let zipCmd = "zip" - -- check for zip in path: - zipPathMaybe <- findExecutable zipCmd - let zipPath = case zipPathMaybe of - Nothing -> error $ "The '" ++ zipCmd ++ - "' command, which is needed to build an ODT file, was not found.\n" ++ - "It can be obtained from http://www.info-zip.org/Zip.html\n" ++ - "Debian (and Debian-based) linux: apt-get install zip\n" ++ - "Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm" +-- | Produce an PDF file from LaTeX. +saveLaTeXAsPDF :: FilePath -- ^ Pathname of PDF file to be produced. + -> FilePath -- ^ Relative directory of source file. + -> String -- ^ LaTeX document. + -> IO () +saveLaTeXAsPDF destinationPDFPath sourceDirRelative latex = do + -- check for pdflatex and bibtex in path: + latexPathMaybe <- findExecutable "pdflatex" + bibtexPathMaybe <- findExecutable "bibtex" + let latexPath = case latexPathMaybe of + Nothing -> error $ "The 'pdflatex' command, which is needed to build an PDF file, was not found." Just x -> x - withTempDir "pandoc-odt" $ \tempDir -> do - let tempODT = tempDir </> "reference.odt" - B.writeFile tempODT $ B.pack $ map c2w $(binaryContentsOf $ "odt-styles" </> "reference.odt") - xml' <- handlePictures tempODT sourceDirRelative xml - writeFile (tempDir </> "content.xml") xml' - ph <- runProcess zipPath ["-9", "-q", "-r", tempODT, "content.xml", "Pictures"] - (Just tempDir) Nothing Nothing Nothing (Just stderr) - ec <- waitForProcess ph -- requires compilation with -threaded - case ec of - ExitSuccess -> copyFile tempODT destinationODTPath - _ -> error "Error creating ODT." >> exitWith ec - --- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in --- the zip file. If filename is a URL, attempt to download it. Modify xlink:href attributes --- to point to the new locations in Pictures/. Return modified XML. -handlePictures :: FilePath -- ^ Path of ODT file in temp directory - -> FilePath -- ^ Directory (relative) containing source file - -> String -- ^ OpenDocument XML string - -> IO String -- ^ Modified XML -handlePictures tempODT sourceDirRelative xml = do - let parsed = case parseXMLDoc xml of - Nothing -> error "Could not parse OpenDocument XML." - Just x -> x - let cursor = case (fromForest $ elContent parsed) of - Nothing -> error "ODT appears empty" - Just x -> x - cursor' <- scanPictures tempODT sourceDirRelative cursor - let modified = parsed { elContent = toForest $ root cursor' } - return $ showTopElement modified - -scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor -scanPictures tempODT sourceDirRelative cursor = do - cursor' <- handleTree tempODT sourceDirRelative cursor - case right cursor' of - Just n -> scanPictures tempODT sourceDirRelative n - Nothing -> return cursor' - -handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor -handleTree tempODT sourceDirRelative cursor = do - case firstChild cursor of - Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor - Just n -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent + let bibtexPath = case bibtexPathMaybe of + Nothing -> error $ "The 'bibtex' command, which is needed to build an PDF file, was not found." + Just x -> x + sourceDirAbsolute <- getCurrentDirectory >>= return . (</> sourceDirRelative) >>= canonicalizePath + withTempDir "pandoc-pdf" $ \tempDir -> do + env <- getEnvironment + let env' = ("TEXINPUTS", ".:" ++ sourceDirAbsolute ++ ":") : env + let baseName = "input" + writeFile (tempDir </> baseName <.> "tex") latex + let runLatex = runProgram latexPath ["-interaction=nonstopmode", baseName] tempDir env' + let runBibtex = runProgram bibtexPath [baseName] tempDir env' + messages1 <- runLatex + let logPath = tempDir </> baseName <.> "log" + tocExists <- doesFileExist (tempDir </> baseName <.> "toc") + logContents <- readFile logPath + let undefinedRefs = "There were undefined references" `isInfixOf` logContents + let needsBibtex = "itation" `isInfixOf` logContents + if needsBibtex + then runBibtex >>= hPutStr stderr . unlines + else return () + if tocExists || undefinedRefs + then do + messages2 <- runLatex + logContents' <- readFile logPath + let stillUndefinedRefs = "There were undefined references" `isInfixOf` logContents' + if stillUndefinedRefs + then runLatex >>= hPutStr stderr . unlines + else hPutStr stderr $ unlines messages2 + else + hPutStr stderr $ unlines messages1 + let pdfPath = tempDir </> baseName <.> "pdf" + pdfExists <- doesFileExist pdfPath + if pdfExists + then copyFile pdfPath destinationPDFPath + else error "The PDF could not be created." --- | If content is an image link, handle it appropriately. --- Otherwise, handle children if any. -handleContent :: FilePath -> FilePath -> Content -> IO Content -handleContent tempODT sourceDirRelative content@(Elem el) = do - if qName (elName el) == "image" - then do - let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el - let href = case hrefs of - [] -> error $ "No href found in " ++ show el - [x] -> x - _ -> error $ "Multiple hrefs found in " ++ show el - if isURI $ attrVal href - then return content - else do -- treat as filename - let oldLoc = sourceDirRelative </> attrVal href - fileExists <- doesFileExist oldLoc - if fileExists - then do - let pref = take 230 $ concat $ intersperse "_" $ - splitDirectories $ takeDirectory $ attrVal href - let newLoc = "Pictures" </> pref ++ "_" ++ (takeFileName $ attrVal href) - let tempDir = takeDirectory tempODT - createDirectoryIfMissing False $ tempDir </> takeDirectory newLoc - copyFile oldLoc $ tempDir </> newLoc - let newAttrs = (href { attrVal = newLoc }) : rest - return $ Elem (el { elAttribs = newAttrs }) - else do - hPutStrLn stderr $ "Warning: Unable to find image at " ++ oldLoc ++ " - ignoring." - return content - else return content - -handleContent _ _ c = return c -- not Element +runProgram :: FilePath -- ^ pathname of executable + -> [String] -- ^ arguments + -> FilePath -- ^ working directory + -> [(String, String)] -- ^ environment + -> IO [String] +runProgram cmdPath arguments workingDir env = do + let runOutputPath = workingDir </> "output" <.> "tmp" + runOutput <- openFile runOutputPath WriteMode + ph <- runProcess cmdPath arguments (Just workingDir) (Just env) Nothing (Just runOutput) (Just runOutput) + ec <- waitForProcess ph -- requires compilation with -threaded + case ec of + ExitSuccess -> return [] + _ -> do + output <- readFile runOutputPath + if (takeBaseName cmdPath) == "bibtex" + then return $! lines output + else do + return $! + (if "`ucs.sty' not found" `isInfixOf` output + then ["Please install the 'unicode' package from CTAN:", + " http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/"] + else []) ++ + (if "`ulem.sty' not found" `isInfixOf` output + then ["Please install the 'ulem' package from CTAN:", + " http://www.ctan.org/tex-archive/macros/latex/contrib/misc/"] + else []) ++ + (if "`graphicx.sty' not found" `isInfixOf` output + then ["Please install the 'graphicx' package from CTAN:", + " http://www.ctan.org/tex-archive/macros/latex/required/graphics/"] + else []) ++ + (if "`fancyhdr.sty' not found" `isInfixOf` output + then ["Please install the 'fancyhdr' package from CTAN:", + " http://www.ctan.org/tex-archive/macros/latex/contrib/fancyhdr/"] + else []) ++ + (if "`array.sty' not found" `isInfixOf` output + then ["Please install the 'array' package from CTAN:", + " http://www.ctan.org/tex-archive/macros/latex/required/tools/"] + else []) ++ + (filter isUseful $ lines output) + where isUseful ln = take 1 ln == "!" || + take 2 ln == "l." || + "Error" `isInfixOf` ln || + "error" `isInfixOf` ln diff --git a/pandoc.cabal b/pandoc.cabal index 1c39a2208..1871275c8 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -165,6 +165,7 @@ Library Text.Pandoc.CharacterReferences, Text.Pandoc.Shared, Text.Pandoc.ODT, + Text.Pandoc.PDF, Text.Pandoc.ASCIIMathML, Text.Pandoc.DefaultHeaders, Text.Pandoc.Highlighting, |