aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Odt.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Odt.hs')
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs113
1 files changed, 113 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
new file mode 100644
index 000000000..ac22f2c09
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -0,0 +1,113 @@
+{-# LANGUAGE PatternGuards #-}
+
+{-
+Copyright (C) 2015 Martin Linnemann <theCodingMarlin@googlemail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Reader.Odt
+ Copyright : Copyright (C) 2015 Martin Linnemann
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com>
+ Stability : alpha
+ Portability : portable
+
+Entry point to the odt reader.
+-}
+
+module Text.Pandoc.Readers.Odt ( readOdt ) where
+
+import Codec.Archive.Zip
+import qualified Text.XML.Light as XML
+
+import qualified Data.ByteString.Lazy as B
+
+import System.FilePath
+
+import Control.Monad.Except (throwError)
+
+import Text.Pandoc.Class (PandocMonad)
+import qualified Text.Pandoc.Class as P
+import Text.Pandoc.Definition
+import Text.Pandoc.Error
+import Text.Pandoc.Options
+import Text.Pandoc.MediaBag
+import qualified Text.Pandoc.UTF8 as UTF8
+
+import Text.Pandoc.Readers.Odt.ContentReader
+import Text.Pandoc.Readers.Odt.StyleReader
+
+import Text.Pandoc.Readers.Odt.Generic.XMLConverter
+import Text.Pandoc.Readers.Odt.Generic.Fallible
+import Text.Pandoc.Shared (filteredFilesFromArchive)
+
+readOdt :: PandocMonad m
+ => ReaderOptions
+ -> B.ByteString
+ -> m Pandoc
+readOdt opts bytes = case readOdt' opts bytes of
+ Right (doc, mb) -> do
+ P.setMediaBag mb
+ return doc
+ Left e -> throwError e
+
+--
+readOdt' :: ReaderOptions
+ -> B.ByteString
+ -> Either PandocError (Pandoc, MediaBag)
+readOdt' _ bytes = bytesToOdt bytes-- of
+-- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag)
+-- Left err -> Left err
+
+--
+bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
+bytesToOdt bytes = case toArchiveOrFail bytes of
+ Right archive -> archiveToOdt archive
+ Left _ -> Left $ PandocParseError "Couldn't parse odt file."
+
+--
+archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
+archiveToOdt archive
+ | Just contentEntry <- findEntryByPath "content.xml" archive
+ , Just stylesEntry <- findEntryByPath "styles.xml" archive
+ , Just contentElem <- entryToXmlElem contentEntry
+ , Just stylesElem <- entryToXmlElem stylesEntry
+ , Right styles <- chooseMax (readStylesAt stylesElem )
+ (readStylesAt contentElem)
+ , media <- filteredFilesFromArchive archive filePathIsOdtMedia
+ , startState <- readerState styles media
+ , Right pandocWithMedia <- runConverter' read_body
+ startState
+ contentElem
+
+ = Right pandocWithMedia
+
+ | otherwise
+ -- Not very detailed, but I don't think more information would be helpful
+ = Left $ PandocParseError "Couldn't parse odt file."
+ where
+ filePathIsOdtMedia :: FilePath -> Bool
+ filePathIsOdtMedia fp =
+ let (dir, _) = splitFileName fp
+ in
+ (dir == "Pictures/")
+
+
+--
+entryToXmlElem :: Entry -> Maybe XML.Element
+entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry