aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Odt
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Odt')
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs123
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs32
2 files changed, 133 insertions, 22 deletions
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 8c475eefc..42f018157 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -42,6 +42,7 @@ module Text.Pandoc.Readers.Odt.ContentReader
import Control.Arrow
import Control.Applicative hiding ( liftA, liftA2, liftA3 )
+import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
import Data.List ( find )
import Data.Maybe
@@ -50,6 +51,7 @@ import qualified Text.XML.Light as XML
import Text.Pandoc.Definition
import Text.Pandoc.Builder
+import Text.Pandoc.MediaBag (insertMedia, MediaBag)
import Text.Pandoc.Shared
import Text.Pandoc.Readers.Odt.Base
@@ -68,6 +70,7 @@ import qualified Data.Set as Set
--------------------------------------------------------------------------------
type Anchor = String
+type Media = [(FilePath, B.ByteString)]
data ReaderState
= ReaderState { -- | A collection of styles read somewhere else.
@@ -87,14 +90,17 @@ data ReaderState
-- | A map from internal anchor names to "pretty" ones.
-- The mapping is a purely cosmetic one.
, bookmarkAnchors :: M.Map Anchor Anchor
-
+ -- | A map of files / binary data from the archive
+ , envMedia :: Media
+ -- | Hold binary resources used in the document
+ , odtMediaBag :: MediaBag
-- , sequences
-- , trackedChangeIDs
}
deriving ( Show )
-readerState :: Styles -> ReaderState
-readerState styles = ReaderState styles [] 0 Nothing M.empty
+readerState :: Styles -> Media -> ReaderState
+readerState styles media = ReaderState styles [] 0 Nothing M.empty media mempty
--
pushStyle' :: Style -> ReaderState -> ReaderState
@@ -134,6 +140,16 @@ putPrettyAnchor ugly pretty state@ReaderState{..}
usedAnchors :: ReaderState -> [Anchor]
usedAnchors ReaderState{..} = M.elems bookmarkAnchors
+getMediaBag :: ReaderState -> MediaBag
+getMediaBag ReaderState{..} = odtMediaBag
+
+getMediaEnv :: ReaderState -> Media
+getMediaEnv ReaderState{..} = envMedia
+
+insertMedia' :: (FilePath, B.ByteString) -> ReaderState -> ReaderState
+insertMedia' (fp, bs) state@ReaderState{..}
+ = state { odtMediaBag = insertMedia fp Nothing bs odtMediaBag }
+
--------------------------------------------------------------------------------
-- Reader type and associated tools
--------------------------------------------------------------------------------
@@ -190,6 +206,22 @@ popStyle = keepingTheValue (
getCurrentListLevel :: OdtReaderSafe _x ListLevel
getCurrentListLevel = getExtraState >>^ currentListLevel
+--
+updateMediaWithResource :: OdtReaderSafe (FilePath, B.ByteString) (FilePath, B.ByteString)
+updateMediaWithResource = keepingTheValue (
+ (keepingTheValue getExtraState
+ >>% insertMedia'
+ )
+ >>> setExtraState
+ )
+ >>^ fst
+
+lookupResource :: OdtReaderSafe String (FilePath, B.ByteString)
+lookupResource = proc target -> do
+ state <- getExtraState -< ()
+ case lookup target (getMediaEnv state) of
+ Just bs -> returnV (target, bs) -<< ()
+ Nothing -> returnV ("", B.empty) -< ()
type AnchorPrefix = String
@@ -386,7 +418,7 @@ getListConstructor ListLevelStyle{..} =
LltNumbered -> let listNumberStyle = toListNumberStyle listItemFormat
listNumberDelim = toListNumberDelim listItemPrefix
listItemSuffix
- in orderedListWith (1, listNumberStyle, listNumberDelim)
+ in orderedListWith (listItemStart, listNumberStyle, listNumberDelim)
where
toListNumberStyle LinfNone = DefaultStyle
toListNumberStyle LinfNumber = Decimal
@@ -511,6 +543,10 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover
extractText (XML.Text cData) = succeedWith (XML.cdData cData)
extractText _ = failEmpty
+read_text_seq :: InlineMatcher
+read_text_seq = matchingElement NsText "sequence"
+ $ matchChildContent [] read_plain_text
+
-- specifically. I honor that, although the current implementation of '(<>)'
-- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein.
@@ -559,6 +595,8 @@ read_paragraph = matchingElement NsText "p"
, read_reference_start
, read_bookmark_ref
, read_reference_ref
+ , read_maybe_nested_img_frame
+ , read_text_seq
] read_plain_text
@@ -583,6 +621,7 @@ read_header = matchingElement NsText "h"
, read_reference_start
, read_bookmark_ref
, read_reference_ref
+ , read_maybe_nested_img_frame
] read_plain_text
) -< blocks
anchor <- getHeaderAnchor -< children
@@ -688,6 +727,68 @@ read_table_cell = matchingElement NsTable "table-cell"
]
----------------------
+-- Images
+----------------------
+
+--
+read_maybe_nested_img_frame :: InlineMatcher
+read_maybe_nested_img_frame = matchingElement NsDraw "frame"
+ $ proc blocks -> do
+ img <- (findChild' NsDraw "image") -< ()
+ case img of
+ Just _ -> read_frame -< blocks
+ Nothing -> matchChildContent' [ read_frame_text_box ] -< blocks
+
+read_frame :: OdtReaderSafe Inlines Inlines
+read_frame =
+ proc blocks -> do
+ w <- ( findAttr' NsSVG "width" ) -< ()
+ h <- ( findAttr' NsSVG "height" ) -< ()
+ titleNodes <- ( matchChildContent' [ read_frame_title ] ) -< blocks
+ src <- matchChildContent' [ read_image_src ] -< blocks
+ resource <- lookupResource -< src
+ _ <- updateMediaWithResource -< resource
+ alt <- (matchChildContent [] read_plain_text) -< blocks
+ arr (uncurry4 imageWith ) -<
+ (image_attributes w h, src, inlineListToIdentifier (toList titleNodes), alt)
+
+image_attributes :: Maybe String -> Maybe String -> Attr
+image_attributes x y =
+ ( "", [], (dim "width" x) ++ (dim "height" y))
+ where
+ dim _ (Just "") = []
+ dim name (Just v) = [(name, v)]
+ dim _ Nothing = []
+
+read_image_src :: (Namespace, ElementName, OdtReader Anchor Anchor)
+read_image_src = matchingElement NsDraw "image"
+ $ proc _ -> do
+ imgSrc <- findAttr NsXLink "href" -< ()
+ case imgSrc of
+ Right src -> returnV src -<< ()
+ Left _ -> returnV "" -< ()
+
+read_frame_title :: InlineMatcher
+read_frame_title = matchingElement NsSVG "title"
+ $ (matchChildContent [] read_plain_text)
+
+read_frame_text_box :: InlineMatcher
+read_frame_text_box = matchingElement NsDraw "text-box"
+ $ proc blocks -> do
+ paragraphs <- (matchChildContent' [ read_paragraph ]) -< blocks
+ arr read_img_with_caption -< toList paragraphs
+
+read_img_with_caption :: [Block] -> Inlines
+read_img_with_caption ((Para ((Image attr alt (src,title)) : [])) : _) =
+ singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption
+read_img_with_caption ((Para ((Image attr _ (src,title)) : txt)) : _) =
+ singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows
+read_img_with_caption ( (Para (_ : xs)) : ys) =
+ read_img_with_caption ((Para xs) : ys)
+read_img_with_caption _ =
+ mempty
+
+----------------------
-- Internal links
----------------------
@@ -713,9 +814,8 @@ maybeAddAnchorFrom anchorReader =
>>>
proc (inlines, fAnchorElem) -> do
case fAnchorElem of
- Right anchorElem ->
- arr (anchorElem <>) -<< inlines
- Left _ -> returnA -< inlines
+ Right anchorElem -> returnA -< anchorElem
+ Left _ -> returnA -< inlines
where
toAnchorElem :: Anchor -> Inlines
toAnchorElem anchorID = spanWith (anchorID, [], []) mempty
@@ -783,8 +883,11 @@ read_text = matchChildContent' [ read_header
]
>>^ doc
-read_body :: OdtReader _x Pandoc
+read_body :: OdtReader _x (Pandoc, MediaBag)
read_body = executeIn NsOffice "body"
$ executeIn NsOffice "text"
- $ liftAsSuccess read_text
-
+ $ liftAsSuccess
+ $ proc inlines -> do
+ txt <- read_text -< inlines
+ state <- getExtraState -< ()
+ returnA -< (txt, getMediaBag state)
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 96cfed0b3..26ba6df82 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -76,8 +76,9 @@ import Control.Applicative hiding ( liftA, liftA2, liftA3 )
import qualified Data.Foldable as F
import qualified Data.Map as M
import qualified Data.Set as S
-import Data.List ( unfoldr )
+import Data.Char ( isDigit )
import Data.Default
+import Data.List ( unfoldr )
import Data.Maybe
import qualified Text.XML.Light as XML
@@ -390,6 +391,7 @@ data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType
, listItemPrefix :: Maybe String
, listItemSuffix :: Maybe String
, listItemFormat :: ListItemNumberFormat
+ , listItemStart :: Int
}
deriving ( Eq, Ord )
@@ -578,25 +580,31 @@ readListLevelStyles namespace elementName levelType =
readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle)
readListLevelStyle levelType = readAttr NsText "level"
>>?! keepingTheValue
- ( liftA4 toListLevelStyle
- ( returnV levelType )
- ( findAttr' NsStyle "num-prefix" )
- ( findAttr' NsStyle "num-suffix" )
- ( getAttr NsStyle "num-format" )
+ ( liftA5 toListLevelStyle
+ ( returnV levelType )
+ ( findAttr' NsStyle "num-prefix" )
+ ( findAttr' NsStyle "num-suffix" )
+ ( getAttr NsStyle "num-format" )
+ ( findAttr' NsText "start-value" )
)
where
- toListLevelStyle _ p s LinfNone = ListLevelStyle LltBullet p s LinfNone
- toListLevelStyle _ p s f@(LinfString _) = ListLevelStyle LltBullet p s f
- toListLevelStyle t p s f = ListLevelStyle t p s f
+ toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b)
+ toListLevelStyle _ p s f@(LinfString _) b = ListLevelStyle LltBullet p s f (startValue b)
+ toListLevelStyle t p s f b = ListLevelStyle t p s f (startValue b)
+ startValue (Just "") = 1
+ startValue (Just v) = if all isDigit v
+ then read v
+ else 1
+ startValue Nothing = 1
--
chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle
chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing
| otherwise = Just ( F.foldr1 select ls )
where
- select ( ListLevelStyle t1 p1 s1 f1 )
- ( ListLevelStyle t2 p2 s2 f2 )
- = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2)
+ select ( ListLevelStyle t1 p1 s1 f1 b1 )
+ ( ListLevelStyle t2 p2 s2 f2 _ )
+ = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1
select' LltNumbered _ = LltNumbered
select' _ LltNumbered = LltNumbered
select' _ _ = LltBullet