diff options
author | Hubert Plociniczak <hubert.plociniczak@gmail.com> | 2016-10-12 17:42:30 +0200 |
---|---|---|
committer | Hubert Plociniczak <hubert.plociniczak@gmail.com> | 2016-10-12 17:50:35 +0200 |
commit | c924611de526601f64154bef83035f75e8f4c334 (patch) | |
tree | f665c276c4683f018e06357b0efe34ff43450c6b /src/Text/Pandoc/Readers/Odt | |
parent | cbeb72d06b4eb3718479eba5257a33a385f642fe (diff) | |
download | pandoc-c924611de526601f64154bef83035f75e8f4c334.tar.gz |
Basic support for images in ODT documents
Highly influenced by the docx support, refactored
some code to avoid DRY.
Diffstat (limited to 'src/Text/Pandoc/Readers/Odt')
-rw-r--r-- | src/Text/Pandoc/Readers/Odt/ContentReader.hs | 89 |
1 files changed, 83 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 8c475eefc..d61707976 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -42,6 +42,7 @@ module Text.Pandoc.Readers.Odt.ContentReader import Control.Arrow import Control.Applicative hiding ( liftA, liftA2, liftA3 ) +import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import Data.List ( find ) import Data.Maybe @@ -50,6 +51,7 @@ import qualified Text.XML.Light as XML import Text.Pandoc.Definition import Text.Pandoc.Builder +import Text.Pandoc.MediaBag (insertMedia, MediaBag) import Text.Pandoc.Shared import Text.Pandoc.Readers.Odt.Base @@ -68,6 +70,7 @@ import qualified Data.Set as Set -------------------------------------------------------------------------------- type Anchor = String +type Media = [(FilePath, B.ByteString)] data ReaderState = ReaderState { -- | A collection of styles read somewhere else. @@ -87,14 +90,17 @@ data ReaderState -- | A map from internal anchor names to "pretty" ones. -- The mapping is a purely cosmetic one. , bookmarkAnchors :: M.Map Anchor Anchor - + -- | A map of files / binary data from the archive + , envMedia :: Media + -- | Hold binary resources used in the document + , odtMediaBag :: MediaBag -- , sequences -- , trackedChangeIDs } deriving ( Show ) -readerState :: Styles -> ReaderState -readerState styles = ReaderState styles [] 0 Nothing M.empty +readerState :: Styles -> Media -> ReaderState +readerState styles media = ReaderState styles [] 0 Nothing M.empty media mempty -- pushStyle' :: Style -> ReaderState -> ReaderState @@ -134,6 +140,16 @@ putPrettyAnchor ugly pretty state@ReaderState{..} usedAnchors :: ReaderState -> [Anchor] usedAnchors ReaderState{..} = M.elems bookmarkAnchors +getMediaBag :: ReaderState -> MediaBag +getMediaBag ReaderState{..} = odtMediaBag + +getMediaEnv :: ReaderState -> Media +getMediaEnv ReaderState{..} = envMedia + +insertMedia' :: (FilePath, B.ByteString) -> ReaderState -> ReaderState +insertMedia' (fp, bs) state@ReaderState{..} + = state { odtMediaBag = insertMedia fp Nothing bs odtMediaBag } + -------------------------------------------------------------------------------- -- Reader type and associated tools -------------------------------------------------------------------------------- @@ -190,6 +206,22 @@ popStyle = keepingTheValue ( getCurrentListLevel :: OdtReaderSafe _x ListLevel getCurrentListLevel = getExtraState >>^ currentListLevel +-- +updateMediaWithResource :: OdtReaderSafe (FilePath, B.ByteString) (FilePath, B.ByteString) +updateMediaWithResource = keepingTheValue ( + (keepingTheValue getExtraState + >>% insertMedia' + ) + >>> setExtraState + ) + >>^ fst + +lookupResource :: OdtReaderSafe String (FilePath, B.ByteString) +lookupResource = proc target -> do + state <- getExtraState -< () + case lookup target (getMediaEnv state) of + Just bs -> returnV (target, bs) -<< () + Nothing -> returnV ("", B.empty) -< () type AnchorPrefix = String @@ -559,6 +591,7 @@ read_paragraph = matchingElement NsText "p" , read_reference_start , read_bookmark_ref , read_reference_ref + , read_frame ] read_plain_text @@ -583,6 +616,7 @@ read_header = matchingElement NsText "h" , read_reference_start , read_bookmark_ref , read_reference_ref + , read_frame ] read_plain_text ) -< blocks anchor <- getHeaderAnchor -< children @@ -688,6 +722,46 @@ read_table_cell = matchingElement NsTable "table-cell" ] ---------------------- +-- Images +---------------------- + +-- +read_frame :: InlineMatcher +read_frame = matchingElement NsDraw "frame" + $ proc blocks -> do + w <- ( findAttr' NsSVG "width" ) -< () + h <- ( findAttr' NsSVG "height" ) -< () + attr <- arr (uncurry image_attributes) -< (w, h) + titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks + title <- arr inlineListToIdentifier -< (toList titleNodes) + src <- matchChildContent' [ read_image_src ] -< blocks + resource <- lookupResource -< src + _ <- updateMediaWithResource -< resource + arr (uncurry4 imageWith ) -< (attr, src, title, mempty) + +image_attributes :: Maybe String -> Maybe String -> Attr +image_attributes x y = + ( "", [], (dim "width" x) ++ (dim "height" y)) + where + dim _ (Just "") = [] + dim name (Just v) = [(name, v)] + dim _ Nothing = [] + +read_image_src :: (Namespace, ElementName, OdtReader Anchor Anchor) +read_image_src = matchingElement NsDraw "image" + $ proc _ -> do + imgSrc <- findAttr NsXLink "href" -< () + case imgSrc of + Right src -> returnV src -<< () + Left _ -> returnV "" -< () + +read_frame_title :: InlineMatcher +read_frame_title = matchingElement NsSVG "title" + $ (matchChildContent [] read_plain_text) + + + +---------------------- -- Internal links ---------------------- @@ -783,8 +857,11 @@ read_text = matchChildContent' [ read_header ] >>^ doc -read_body :: OdtReader _x Pandoc +read_body :: OdtReader _x (Pandoc, MediaBag) read_body = executeIn NsOffice "body" $ executeIn NsOffice "text" - $ liftAsSuccess read_text - + $ liftAsSuccess + $ proc inlines -> do + txt <- read_text -< inlines + state <- getExtraState -< () + returnA -< (txt, getMediaBag state) |