diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-07-31 23:16:02 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-07-31 23:16:02 +0000 |
commit | d072ad4b66b5cdf5a8f811ae1f78460e34270d58 (patch) | |
tree | 6f77dde12c4d5824434e0961f71c9f38f19be4cd | |
parent | 504a61a97b9dbd0b55b2b06fd2c1f547d71a1fa1 (diff) | |
download | pandoc-d072ad4b66b5cdf5a8f811ae1f78460e34270d58.tar.gz |
Added 'odt' output option to pandoc:
Not a writer, but a module that inserts the output of the OpenDocument
writer into an ODT archive. This replaces markdown2odt.
+ Added odt output option to Main.hs.
+ Added default for .odt output file.
+ Changed defaults so that .xml and .sgml aren't automatically DocBook.
+ Added odt writer to Text.Pandoc exports.
+ Added Text.Pandoc.ODT and included in pandoc.cabal.
+ Added reference.odt as data-file in pandoc.cabal.
+ Handle picture links in OpenDocument files using xml library.
+ Removed markdown2odt and references from Makefile, README, man.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1345 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r-- | Main.hs | 46 | ||||
-rw-r--r-- | Makefile | 7 | ||||
-rw-r--r-- | README | 26 | ||||
-rw-r--r-- | Text/Pandoc/ODT.hs | 141 | ||||
-rw-r--r-- | Text/Pandoc/Shared.hs | 26 | ||||
-rw-r--r-- | man/man1/markdown2odt.1.md | 52 | ||||
-rw-r--r-- | pandoc.cabal | 15 | ||||
-rw-r--r-- | wrappers/markdown2odt.in | 41 |
8 files changed, 210 insertions, 144 deletions
@@ -31,11 +31,12 @@ writers. module Main where import Text.Pandoc import Text.Pandoc.UTF8 +import Text.Pandoc.ODT import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) ) import Text.Pandoc.Highlighting ( languages ) import System.Environment ( getArgs, getProgName, getEnvironment ) import System.Exit ( exitWith, ExitCode (..) ) -import System.FilePath ( takeExtension ) +import System.FilePath ( takeExtension, takeDirectory ) import System.Console.GetOpt import System.IO import Data.Maybe ( fromMaybe ) @@ -82,6 +83,7 @@ writers = [("native" , (writeDoc, "")) ,("s5" , (writeS5String, defaultS5Header)) ,("docbook" , (writeDocbook, defaultDocbookHeader)) ,("opendocument" , (writeOpenDocument, defaultOpenDocumentHeader)) + ,("odt" , (writeOpenDocument, defaultOpenDocumentHeader)) ,("latex" , (writeLaTeX, defaultLaTeXHeader)) ,("context" , (writeConTeXt, defaultConTeXtHeader)) ,("texinfo" , (writeTexinfo, "")) @@ -92,6 +94,10 @@ writers = [("native" , (writeDoc, "")) ,("rtf" , (writeRTF, defaultRTFHeader)) ] +isNonTextOutput :: String -> Bool +isNonTextOutput "odt" = True +isNonTextOutput _ = False + -- | Writer for Pandoc native format. writeDoc :: WriterOptions -> Pandoc -> String writeDoc _ = prettyPandoc @@ -392,8 +398,7 @@ defaultWriterName x = ".texi" -> "texinfo" ".texinfo" -> "texinfo" ".db" -> "docbook" - ".xml" -> "docbook" - ".sgml" -> "docbook" + ".odt" -> "odt" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" @@ -478,10 +483,6 @@ main = do Just (w,h) -> return (w, h) Nothing -> error ("Unknown writer: " ++ writerName') - output <- if (outputFile == "-") - then return stdout - else openFile outputFile WriteMode - environment <- getEnvironment let columns = case lookup "COLUMNS" environment of Just cols -> read cols @@ -501,11 +502,13 @@ main = do tabFilter spsToNextStop (x:xs) = x:(tabFilter (spsToNextStop - 1) xs) + let standalone' = (standalone && not strict) || writerName' == "odt" + let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, stateSanitizeHTML = sanitize, - stateStandalone = standalone && (not strict), + stateStandalone = standalone', stateSmart = smart || writerName' `elem` ["latex", "context"], stateColumns = columns, @@ -519,16 +522,15 @@ main = do let header = (if (customHeader == "DEFAULT") then defaultHeader else customHeader) ++ csslink ++ includeHeader - let writerOptions = WriterOptions { writerStandalone = standalone && - (not strict), + let writerOptions = WriterOptions { writerStandalone = standalone', writerHeader = header, writerTitlePrefix = titlePrefix, writerTabStop = tabStop, writerTableOfContents = toc && (not strict) && - writerName/="s5", + writerName' /= "s5", writerHTMLMathMethod = mathMethod, - writerS5 = (writerName=="s5"), + writerS5 = (writerName' == "s5"), writerIgnoreNotes = False, writerIncremental = incremental, writerNumberSections = numberSections, @@ -538,11 +540,25 @@ main = do writerReferenceLinks = referenceLinks, writerWrapText = wrap } - (readSources sources) >>= (hPutStrLn output . toUTF8 . + let writeOutput = if writerName' == "odt" + then if outputFile == "-" + then \_ -> do + hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ + " output to stdout.\n" ++ + "Specify an output file using the -o option.") + exitWith $ ExitFailure 5 + else let sourceDirRelative = if null sources + then "" + else takeDirectory (head sources) + in saveOpenDocumentAsODT outputFile sourceDirRelative + else if outputFile == "-" + then putStrLn + else writeFile outputFile . (++ "\n") + + (readSources sources) >>= writeOutput . toUTF8 . (writer writerOptions) . (reader startParserState) . tabFilter tabStop . - fromUTF8 . (joinWithSep "\n")) >> - hClose output + fromUTF8 . (joinWithSep "\n") where readSources [] = mapM readSource ["-"] @@ -24,7 +24,7 @@ EXECSBASE := $(shell sed -ne 's/^[Ee]xecutable:\{0,1\}[[:space:]]*//p' $(CABAL)) #------------------------------------------------------------------------------- # Install targets #------------------------------------------------------------------------------- -WRAPPERS := html2markdown markdown2pdf hsmarkdown markdown2odt +WRAPPERS := html2markdown markdown2pdf hsmarkdown # Add .exe extensions if we're running Windows/Cygwin. EXTENSION := $(shell uname | tr '[:upper:]' '[:lower:]' | \ sed -ne 's/^cygwin.*$$/\.exe/p') @@ -114,11 +114,6 @@ $(ODTREF): $(addprefix $(ODTSTYLES)/, layout-cache meta.xml styles.xml content.x cd $(ODTSTYLES) ; \ zip -9 -r $(notdir $@) * -x $(notdir $@) -ODTREFSH=$(SRCDIR)/wrappers/odtref.sh -cleanup_files+=$(ODTREFSH) -$(ODTREFSH): $(ODTREF) - echo "REFERENCEODT='$(PREFIX)/share/$(DATADIR)/$(notdir $(ODTREF))'" > $@ - .PHONY: wrappers wrappers: $(WRAPPERS) cleanup_files+=$(WRAPPERS) @@ -119,7 +119,7 @@ then convert the output back to the local encoding. Shell scripts ============= -Four shell scripts, `markdown2pdf`, `markdown2odt`, `html2markdown`, and +Three shell scripts, `markdown2pdf`, `html2markdown`, and `hsmarkdown`, are included in the standard Pandoc installation. (They are not included in the Windows binary package, as they require a POSIX shell, but they may be used in Windows under Cygwin.) @@ -149,27 +149,7 @@ shell, but they may be used in Windows under Cygwin.) included in your LaTeX distribution, you can get them from [CTAN]. -2. `markdown2odt` produces an ODT file from markdown-formatted - text, using `pandoc` and `pdflatex`. (ODT is "OpenDocument - Text," the default format for the OpenOffice.org Writer.) - The default behavior of `markdown2odt` is to create a file with the - same base name as the first argument and the extension `odt`; thus, - for example, - - markdown2odt sample.txt endnotes.txt - - will produce `sample.odt`. (If `sample.odt` exists already, - it will be backed up before being overwritten.) An output file - name can be specified explicitly using the `-o` option: - - markdown2odt -o book.odt chap1 chap2 - - If no input file is specified, input will be taken from STDIN. - All of `pandoc`'s options will work with `markdown2odt` as well. - - `markdown2odt` requires `zip`, which must be in the path. - -3. `html2markdown` grabs a web page from a file or URL and converts +2. `html2markdown` grabs a web page from a file or URL and converts it to markdown-formatted text, using `tidy` and `pandoc`. All of `pandoc`'s options will work with `html2markdown` as well. @@ -198,7 +178,7 @@ shell, but they may be used in Windows under Cygwin.) It uses [`iconv`] for character encoding conversions; if `iconv` is absent, it will still work, but it will treat everything as UTF-8. -4. `hsmarkdown` is designed to be used as a drop-in replacement for +3. `hsmarkdown` is designed to be used as a drop-in replacement for `Markdown.pl`. It forces `pandoc` to convert from markdown to HTML, and to use the `--strict` flag for maximal compliance with official markdown syntax. (All of Pandoc's syntax extensions and diff --git a/Text/Pandoc/ODT.hs b/Text/Pandoc/ODT.hs new file mode 100644 index 000000000..f388515fb --- /dev/null +++ b/Text/Pandoc/ODT.hs @@ -0,0 +1,141 @@ +{- +Copyright (C) 2008 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.ODT + Copyright : Copyright (C) 2006-7 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions for producing an ODT file from OpenDocument XML. +-} +module Text.Pandoc.ODT ( saveOpenDocumentAsODT ) where +import Data.Maybe ( fromJust ) +import Data.List ( partition, intersperse ) +import Prelude hiding ( writeFile, readFile ) +import System.IO.UTF8 +import System.IO ( stderr ) +import System.Directory +import System.FilePath ( (</>), takeDirectory, takeFileName, splitDirectories ) +import System.Process ( runCommand, waitForProcess ) +import System.Exit +import Text.XML.Light +import Text.XML.Light.Cursor +import Text.Pandoc.Shared ( withTempDir ) +import Network.URI ( isURI ) +import Paths_pandoc + +-- | Produce an ODT file from OpenDocument XML. +saveOpenDocumentAsODT :: FilePath -- ^ Pathname of ODT file to be produced. + -> FilePath -- ^ Relative directory of source file. + -> String -- ^ OpenDocument XML contents. + -> IO () +saveOpenDocumentAsODT destinationODTPath sourceDirRelative xml = do + let zipCmd = "zip" + -- check for zip in path: + maybeZipPath <- findExecutable zipCmd + let zipPath = case maybeZipPath of + Just p -> p + Nothing -> error $ "The '" ++ zipCmd ++ "' command, which is needed to build an ODT file, " ++ + "was not found.\n" ++ + "It can be obtained from http://www.info-zip.org/Zip.html\n" ++ + "Debian (and Debian-based) linux: apt-get install zip\n" ++ + "Windows: See http://gnuwin32.sourceforge.net/packages/zip.htm" + referenceODTPath <- getDataFileName "reference.odt" + withTempDir "pandoc-odt" $ \tempDir -> do + let tempODT = tempDir </> "reference.odt" + copyFile referenceODTPath tempODT + createDirectory $ tempDir </> "Pictures" + xml' <- handlePictures tempODT sourceDirRelative xml + writeFile (tempDir </> "content.xml") xml' + oldDir <- getCurrentDirectory + setCurrentDirectory tempDir + let zipCmdLine = zipPath ++ " -9 -q -r " ++ tempODT ++ " " ++ "content.xml Pictures" + ec <- runCommand zipCmdLine >>= waitForProcess -- this requires compilation with -threaded + setCurrentDirectory oldDir + case ec of + ExitSuccess -> copyFile tempODT destinationODTPath + _ -> error "Error creating ODT." >> exitWith ec + + +-- | Find <draw:image ... /> elements and copy the file (xlink:href attribute) into Pictures/ in +-- the zip file. If filename is a URL, attempt to download it. Modify xlink:href attributes +-- to point to the new locations in Pictures/. Return modified XML. +handlePictures :: FilePath -- ^ Path of ODT file in temp directory + -> FilePath -- ^ Directory (relative) containing source file + -> String -- ^ OpenDocument XML string + -> IO String -- ^ Modified XML +handlePictures tempODT sourceDirRelative xml = do + let parsed = case parseXMLDoc xml of + Nothing -> error "Could not parse OpenDocument XML." + Just x -> x + let cursor = case (fromForest $ elContent parsed) of + Nothing -> error "ODT appears empty" + Just x -> x + cursor' <- scanPictures tempODT sourceDirRelative cursor + let modified = parsed { elContent = toForest $ root cursor' } + putStrLn $ showTopElement modified + return $ showTopElement modified + +scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor +scanPictures tempODT sourceDirRelative cursor = do + cursor' <- handleTree tempODT sourceDirRelative cursor + case right cursor' of + Just n -> scanPictures tempODT sourceDirRelative n + Nothing -> return cursor' + +handleTree :: FilePath -> FilePath -> Cursor -> IO Cursor +handleTree tempODT sourceDirRelative cursor = do + case firstChild cursor of + Nothing -> modifyContentM (handleContent tempODT sourceDirRelative) cursor + Just n -> scanPictures tempODT sourceDirRelative n >>= return . fromJust . parent + +-- | If content is an image link, handle it appropriately. +-- Otherwise, handle children if any. +handleContent :: FilePath -> FilePath -> Content -> IO Content +handleContent tempODT sourceDirRelative content@(Elem el) = do + if qName (elName el) == "image" + then do + let (hrefs, rest) = partition (\a -> qName (attrKey a) == "href") $ elAttribs el + let href = case hrefs of + [] -> error $ "No href found in " ++ show el + [x] -> x + _ -> error $ "Multiple hrefs found in " ++ show el + if isURI $ attrVal href + then return content + else do -- treat as filename + let oldLoc = sourceDirRelative </> attrVal href + fileExists <- doesFileExist oldLoc + if fileExists + then do + let pref = take 230 $ concat $ intersperse "_" $ + splitDirectories $ takeDirectory $ attrVal href + let newLoc = "Pictures" </> pref ++ "_" ++ (takeFileName $ attrVal href) + let tempDir = takeDirectory tempODT + copyFile oldLoc $ tempDir </> newLoc + let newAttrs = (href { attrVal = newLoc }) : rest + return $ Elem (el { elAttribs = newAttrs }) + else do + hPutStrLn stderr $ "Warning: Unable to find image at " ++ oldLoc ++ " - ignoring." + return content + else return content + +handleContent _ _ c = return c -- not Element diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs index 05289d6a6..3c202db0f 100644 --- a/Text/Pandoc/Shared.hs +++ b/Text/Pandoc/Shared.hs @@ -98,7 +98,9 @@ module Text.Pandoc.Shared ( -- * Writer options HTMLMathMethod (..), WriterOptions (..), - defaultWriterOptions + defaultWriterOptions, + -- * File handling + withTempDir ) where import Text.Pandoc.Definition @@ -110,6 +112,9 @@ import Data.Char ( toLower, toUpper, ord, isLower, isUpper ) import Data.List ( find, isPrefixOf ) import Control.Monad ( join ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) +import System.FilePath ( (</>), (<.>) ) +import System.IO.Error ( catch, ioError, isAlreadyExistsError ) +import System.Directory -- -- List processing @@ -900,3 +905,22 @@ defaultWriterOptions = , writerReferenceLinks = False , writerWrapText = True } + +-- | Perform a function in a temporary directory and clean up. +withTempDir :: FilePath -> (FilePath -> IO a) -> IO a +withTempDir baseName func = do + tempDir <- createTempDir 0 baseName + result <- catch (func tempDir) $ \e -> removeDirectoryRecursive tempDir >> ioError e + removeDirectoryRecursive tempDir + return result + +-- | Create a temporary directory with a unique name. +createTempDir :: Integer -> FilePath -> IO FilePath +createTempDir num baseName = do + sysTempDir <- getTemporaryDirectory + let dirName = sysTempDir </> baseName <.> show num + catch (createDirectory dirName >> return dirName) $ + \e -> if isAlreadyExistsError e + then createTempDir (num + 1) baseName + else ioError e + diff --git a/man/man1/markdown2odt.1.md b/man/man1/markdown2odt.1.md deleted file mode 100644 index 9325237f4..000000000 --- a/man/man1/markdown2odt.1.md +++ /dev/null @@ -1,52 +0,0 @@ -% MARKDOWN2ODT(1) Pandoc User Manuals -% John MacFarlane and Recai Oktas -% March 14, 2008 - -# NAME - -markdown2odt - converts markdown-formatted text to ODT - -# SYNOPSIS - -markdown2odt [*options*] [*input-file*]... - -# DESCRIPTION - -`markdown2odt` converts *input-file* (or text from standard -input) from markdown-formatted plain text to ODT (OpenDocument -Text) format. If no output filename is specified (using the `-o` -option), the name of the output file is derived from the input file; -thus, for example, if the input file is *hello.txt*, the output file -will be *hello.odt*. If the input is read from STDIN and no output -filename is specified, the output file will be named *stdin.odt*. If -multiple input files are specified, they will be concatenated before -conversion, and the name of the output file will be derived from the -first input file. - -Input is assumed to be in the UTF-8 character encoding. If your -local character encoding is not UTF-8, you should pipe input -through `iconv`: - - iconv -t utf-8 input.txt | markdown2odt - -# OPTIONS - -`markdown2odt` is a wrapper around `pandoc`, so all of -`pandoc`'s options can be used with `markdown2odt` as well. -See `pandoc`(1) for a complete list. -The following options are most relevant: - --o *FILE*, \--output=*FILE* -: Write output to *FILE*. - -\--strict -: Use strict markdown syntax, with no extensions or variants. - --S, \--smart -: Use smart quotes, dashes, and ellipses. (This option is significant - only when the input format is `markdown`. It is selected automatically - when the output format is `latex` or `context`.) - -# SEE ALSO - -`pandoc`(1) diff --git a/pandoc.cabal b/pandoc.cabal index 679806acb..d0d788499 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -57,6 +57,7 @@ Extra-Source-Files: README, INSTALL, COPYRIGHT, COPYING, Extra-Tmp-Files: Text/Pandoc/ASCIIMathML.hs, Text/Pandoc/DefaultHeaders.hs, Text/Pandoc/Writers/S5.hs +Data-Files: odt-styles/reference.odt Flag splitBase Description: Choose the new, smaller, split-up base package. Default: True @@ -72,7 +73,8 @@ Library if flag(highlighting) Build-depends: highlighting-kate cpp-options: -DHIGHLIGHTING - Build-Depends: parsec < 3, xhtml, mtl, network, filepath + Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, xml, + bytestring, binary Hs-Source-Dirs: . Exposed-Modules: Text.Pandoc, Text.Pandoc.Blocks, @@ -80,6 +82,7 @@ Library Text.Pandoc.CharacterReferences, Text.Pandoc.Shared, Text.Pandoc.UTF8, + Text.Pandoc.ODT, Text.Pandoc.ASCIIMathML, Text.Pandoc.DefaultHeaders, Text.Pandoc.Highlighting, @@ -101,11 +104,11 @@ Library Text.Pandoc.Writers.RTF, Text.Pandoc.Writers.S5 Other-Modules: Text.Pandoc.XML - Ghc-Options: -O2 -Wall + Ghc-Options: -O2 -Wall -threaded Ghc-Prof-Options: -auto-all Executable pandoc - Hs-Source-Dirs: . - Main-Is: Main.hs - Ghc-Options: -O2 -Wall - Ghc-Prof-Options: -auto-all + Hs-Source-Dirs: . + Main-Is: Main.hs + Ghc-Options: -O2 -Wall -threaded + Ghc-Prof-Options: -auto-all diff --git a/wrappers/markdown2odt.in b/wrappers/markdown2odt.in deleted file mode 100644 index e5b3f0212..000000000 --- a/wrappers/markdown2odt.in +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/sh -e - -REQUIRED="zip" -SYNOPSIS="converts markdown-formatted text to ODT." - -### odtref.sh - -### common.sh - -### tempdir.sh - -if [ "$OUTPUT" = "-" ]; then - firstinfile="$(echo $ARGS | sed -ne '1p')" - firstinfilebase="${firstinfile%.*}" - destname="${firstinfilebase:-stdin}.odt" -else - destname="$OUTPUT" -fi - -( - cp $REFERENCEODT $THIS_TEMPDIR/new.odt - pandoc -s -r markdown -w opendocument "$@" -o $THIS_TEMPDIR/content.xml - zip -9 -j $THIS_TEMPDIR/new.odt $THIS_TEMPDIR/content.xml -) || exit $? - -is_target_exists= -if [ -f "$destname" ]; then - is_target_exists=1 - mv "$destname" "$destname~" -fi - -mv -f $THIS_TEMPDIR/new.odt "$destname" - -errn "Created $destname" -[ -z "$is_target_exists" ] || { - errn " (previous file has been backed up as $destname~)" -} - -err . - - |