aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Docx.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Docx.hs')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs253
1 files changed, 34 insertions, 219 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 367e26bd0..6dc3f11c2 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -84,15 +84,16 @@ import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Shared
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
-import Data.Maybe (mapMaybe, fromMaybe)
-import Data.List (delete, stripPrefix, (\\), intersperse, intersect)
+import Data.Maybe (mapMaybe)
+import Data.List (delete, stripPrefix, (\\), intersect)
import Data.Monoid
import Text.TeXMath (writeTeX)
-import qualified Text.TeXMath.Types as TM
+import Data.Default (Default)
import qualified Data.ByteString.Lazy as B
import qualified Data.Map as M
import Control.Monad.Reader
import Control.Monad.State
+import Control.Applicative ((<$>))
readDocx :: ReaderOptions
-> B.ByteString
@@ -104,25 +105,19 @@ readDocx opts bytes =
Left _ -> error $ "couldn't parse docx file"
data DState = DState { docxAnchorMap :: M.Map String String
- , docxMediaBag :: MediaBag
- , docxInHeaderBlock :: Bool}
+ , docxMediaBag :: MediaBag }
-defaultDState :: DState
-defaultDState = DState { docxAnchorMap = M.empty
- , docxMediaBag = mempty
- , docxInHeaderBlock = False}
+instance Default DState where
+ def = DState { docxAnchorMap = M.empty
+ , docxMediaBag = mempty }
-data DEnv = DEnv { docxOptions :: ReaderOptions}
+data DEnv = DEnv { docxOptions :: ReaderOptions
+ , docxInHeaderBlock :: Bool }
-type DocxContext = ReaderT DEnv (State DState)
+instance Default DEnv where
+ def = DEnv def False
-withDState :: (DState -> DState) -> DocxContext a -> DocxContext a
-withDState f dctx = do
- ds <- get
- modify f
- ctx' <- dctx
- put ds
- return ctx'
+type DocxContext = ReaderT DEnv (State DState)
evalDocxContext :: DocxContext a -> DEnv -> DState -> a
evalDocxContext ctx env st = evalState (runReaderT ctx env) st
@@ -161,7 +156,7 @@ isEmptyPar (Paragraph _ parParts) =
isEmptyElem (TextRun s) = trim s == ""
isEmptyElem _ = True
isEmptyPar _ = False
-
+
bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
@@ -170,7 +165,7 @@ bodyPartsToMeta' (bp : bps)
, (Just metaField) <- M.lookup c metaStyles = do
inlines <- parPartsToInlines parParts
remaining <- bodyPartsToMeta' bps
- let
+ let
f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks)
f m (MetaList mv) = MetaList (m : mv)
@@ -321,6 +316,13 @@ runToInlines (Footnote bps) =
concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
runToInlines (Endnote bps) =
concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks])
+runToInlines (InlineDrawing fp bs) = do
+ mediaBag <- gets docxMediaBag
+ modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag }
+ return [Image [] (fp, "")]
+
+
+
parPartToInlines :: ParPart -> DocxContext [Inline]
parPartToInlines (PlainRun r) = runToInlines r
@@ -350,7 +352,7 @@ parPartToInlines (BookMark _ anchor) =
-- user-defined anchor links with header auto ids.
do
-- get whether we're in a header.
- inHdrBool <- gets docxInHeaderBlock
+ inHdrBool <- asks docxInHeaderBlock
-- Get the anchor map.
anchorMap <- gets docxAnchorMap
-- We don't want to rewrite if we're in a header, since we'll take
@@ -365,7 +367,8 @@ parPartToInlines (BookMark _ anchor) =
if not inHdrBool && anchor `elem` (M.elems anchorMap)
then uniqueIdent [Str anchor] (M.elems anchorMap)
else anchor
- modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}
+ unless inHdrBool
+ (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
return [Span (newAnchor, ["anchor"], []) []]
parPartToInlines (Drawing fp bs) = do
mediaBag <- gets docxMediaBag
@@ -377,193 +380,8 @@ parPartToInlines (InternalHyperLink anchor runs) = do
parPartToInlines (ExternalHyperLink target runs) = do
ils <- concatMapM runToInlines runs
return [Link ils (target, "")]
-parPartToInlines (PlainOMath omath) = do
- e <- oMathToExps omath
- return [Math InlineMath (writeTeX e)]
-
-oMathToExps :: OMath -> DocxContext [TM.Exp]
-oMathToExps (OMath oMathElems) = concatMapM oMathElemToExps oMathElems
-
-oMathElemToExps :: OMathElem -> DocxContext [TM.Exp]
-oMathElemToExps (Accent style base) = do
- baseExp <- baseToExp base
- let chr = case accentChar style of
- Just c -> c
- Nothing -> '\180' -- default to acute.
- return [TM.EOver False baseExp (TM.ESymbol TM.Accent [chr])]
-oMathElemToExps(Bar style base) = do
- baseExp <- baseToExp base
- return $ case barPos style of
- Top -> [TM.EOver False baseExp (TM.ESymbol TM.Accent "\175")]
- Bottom -> [TM.EUnder False baseExp (TM.ESymbol TM.Accent "\818")]
-oMathElemToExps (Box base) =
- (\e -> return [e]) =<< baseToExp base
-oMathElemToExps (BorderBox base) =
- -- TODO: This should be "\\boxed" somehow
- (\e -> return [e]) =<< baseToExp base
-oMathElemToExps (Delimiter dPr bases) = do
- baseExps <- mapM baseToExp bases
- let inDelimExps = map Right baseExps
- beg = fromMaybe '(' (delimBegChar dPr)
- end = fromMaybe ')' (delimEndChar dPr)
- sep = fromMaybe '|' (delimSepChar dPr)
- exps = intersperse (Left [sep]) inDelimExps
- return [TM.EDelimited [beg] [end] exps]
-oMathElemToExps (EquationArray bases) = do
- let f b = do bs <- baseToExp' b
- return [bs]
- baseExps <- mapM f bases
- return [TM.EArray [] baseExps]
-oMathElemToExps (Fraction num denom) = do
- numExp <- concatMapM oMathElemToExps num >>= (return . TM.EGrouped)
- denExp <- concatMapM oMathElemToExps denom >>= (return . TM.EGrouped)
- return [TM.EFraction TM.NormalFrac numExp denExp]
-oMathElemToExps (Function fname base) = do
- -- We need a string for the fname, but omml gives it to us as a
- -- series of oMath elems. We're going to filter out the oMathRuns,
- -- which should work for us most of the time.
- let f :: OMathElem -> String
- f (OMathRun _ run) = runToString run
- f _ = ""
- fnameString = concatMap f fname
- baseExp <- baseToExp base
- return [TM.EMathOperator fnameString, baseExp]
-oMathElemToExps (Group style base)
- | Just Top <- groupPos style = do
- baseExp <- baseToExp base
- let chr = case groupChr style of
- Just c -> c
- Nothing -> '\65079' -- default to overbrace
- return [TM.EOver False baseExp (TM.ESymbol TM.Accent [chr])]
- | otherwise = do
- baseExp <- baseToExp base
- let chr = case groupChr style of
- Just c -> c
- Nothing -> '\65080' -- default to underbrace
- return [TM.EUnder False baseExp (TM.ESymbol TM.Accent [chr])]
-oMathElemToExps (LowerLimit base limElems) = do
- baseExp <- baseToExp base
- lim <- concatMapM oMathElemToExps limElems >>= (return . TM.EGrouped)
- return [TM.EUnder True lim baseExp]
-oMathElemToExps (UpperLimit base limElems) = do
- baseExp <- baseToExp base
- lim <- concatMapM oMathElemToExps limElems >>= (return . TM.EGrouped)
- return [TM.EOver True lim baseExp]
-oMathElemToExps (Matrix bases) = do
- rows <- mapM (mapM (\b -> baseToExp' b)) bases
- return [TM.EArray [TM.AlignCenter] rows]
-oMathElemToExps (NAry style sub sup base) = do
- subExps <- concatMapM oMathElemToExps sub
- supExps <- concatMapM oMathElemToExps sup
- baseExp <- baseToExp base
- let opChar = case nAryChar style of
- Just c -> c
- -- default to integral
- Nothing -> '\8747'
- return [ TM.ESubsup
- (TM.ESymbol TM.Op [opChar])
- (TM.EGrouped subExps)
- (TM.EGrouped supExps)
- , baseExp]
-oMathElemToExps (Phantom base) =
- (\e -> return [TM.EPhantom e]) =<< baseToExp base
-oMathElemToExps (Radical degree base) = do
- degExps <- concatMapM oMathElemToExps degree
- baseExp <- baseToExp base
- return $ case degExps of
- [] -> [TM.ESqrt baseExp]
- ds -> [TM.ERoot (TM.EGrouped ds) baseExp]
-oMathElemToExps (PreSubSuper sub sup base) = do
- subExps <- concatMapM oMathElemToExps sub
- supExps <- concatMapM oMathElemToExps sup
- baseExp <- baseToExp base
- return [ TM.ESubsup
- (TM.EIdentifier "") (TM.EGrouped subExps) (TM.EGrouped supExps)
- , baseExp]
-oMathElemToExps (Sub base sub) = do
- baseExp <- baseToExp base
- subExps <- concatMapM oMathElemToExps sub
- return [TM.ESub baseExp (TM.EGrouped subExps)]
-oMathElemToExps (SubSuper base sub sup) = do
- baseExp <- baseToExp base
- subExps <- concatMapM oMathElemToExps sub
- supExps <- concatMapM oMathElemToExps sup
- return [TM.ESubsup baseExp (TM.EGrouped subExps) (TM.EGrouped supExps)]
-oMathElemToExps (Super base sup) = do
- baseExp <- baseToExp base
- supExps <- concatMapM oMathElemToExps sup
- return [TM.ESuper baseExp (TM.EGrouped supExps)]
-oMathElemToExps (OMathRun sty run@(Run _ _))
- | NoStyle <- oMathRunTextStyle sty =
- return $ [TM.EIdentifier $ runToString run]
- | Nothing <- oMathRunStyleToTextType sty =
- return $ [TM.EIdentifier $ runToString run]
- | Just textType <- oMathRunStyleToTextType sty =
- return $ if oMathLit sty
- then [TM.EText textType (runToString run)]
- else [TM.EStyled textType [TM.EIdentifier $ runToString run]]
-oMathElemToExps (OMathRun _ _) = return []
-
-oMathRunStyleToTextType :: OMathRunStyle -> Maybe TM.TextType
-oMathRunStyleToTextType mrPr
- | Normal <- oMathRunTextStyle mrPr =
- Just $ TM.TextNormal
- | Styled scr sty <- oMathRunTextStyle mrPr
- ,Just OBold <- sty
- , Just OSansSerif <- scr =
- Just $ TM.TextSansSerifBold
- | Styled scr sty <- oMathRunTextStyle mrPr
- , Just OBoldItalic <- sty
- , Just OSansSerif <- scr =
- Just $ TM.TextSansSerifBoldItalic
- | Styled scr sty <- oMathRunTextStyle mrPr
- , Just OBold <- sty
- , Just OScript <- scr =
- Just $ TM.TextBoldScript
- | Styled scr sty <- oMathRunTextStyle mrPr
- , Just OBold <- sty
- , Just OFraktur <- scr =
- Just $ TM.TextBoldFraktur
- | Styled scr sty <- oMathRunTextStyle mrPr
- , Just OItalic <- sty
- , Just OSansSerif <- scr =
- Just $ TM.TextSansSerifItalic
- | Styled _ sty <- oMathRunTextStyle mrPr
- , Just OBold <- sty =
- Just $ TM.TextBold
- | Styled _ sty <- oMathRunTextStyle mrPr
- , Just OItalic <- sty =
- Just $ TM.TextItalic
- | Styled scr _ <- oMathRunTextStyle mrPr
- , Just OMonospace <- scr =
- Just $ TM.TextMonospace
- | Styled scr _ <- oMathRunTextStyle mrPr
- , Just OSansSerif <- scr =
- Just $ TM.TextSansSerif
- | Styled scr _ <- oMathRunTextStyle mrPr
- , Just ODoubleStruck <- scr =
- Just $ TM.TextDoubleStruck
- | Styled scr _ <- oMathRunTextStyle mrPr
- , Just OScript <- scr =
- Just $ TM.TextDoubleStruck
- | Styled scr _ <- oMathRunTextStyle mrPr
- , Just OFraktur <- scr =
- Just $ TM.TextFraktur
- | Styled _ sty <- oMathRunTextStyle mrPr
- , Just OBoldItalic <- sty =
- Just $ TM.TextBoldItalic
- | otherwise = Nothing
-
-
-
-baseToExp :: Base -> DocxContext TM.Exp
-baseToExp (Base mathElems) =
- concatMapM oMathElemToExps mathElems >>= (return . TM.EGrouped)
-
--- an ungrouped version of baseToExp
-baseToExp' :: Base -> DocxContext [TM.Exp]
-baseToExp' (Base mathElems) =
- concatMapM oMathElemToExps mathElems
+parPartToInlines (PlainOMath exps) = do
+ return [Math InlineMath (writeTeX exps)]
isAnchorSpan :: Inline -> Bool
@@ -631,8 +449,8 @@ bodyPartToBlocks (Paragraph pPr parparts)
[CodeBlock ("", [], []) (concatMap parPartToString parparts)]
bodyPartToBlocks (Paragraph pPr parparts)
| any isHeaderContainer (parStyleToContainers pPr) = do
- ils <-withDState (\s -> s{docxInHeaderBlock = True}) $
- parPartsToInlines parparts >>= (return . normalizeSpaces)
+ ils <- normalizeSpaces <$> local (\s -> s{docxInHeaderBlock = True})
+ (parPartsToInlines parparts)
let (Container hdrFun) = head $ filter isHeaderContainer (parStyleToContainers pPr)
Header n attr _ = hdrFun []
hdr <- makeHeaderAnchor $ Header n attr ils
@@ -689,11 +507,10 @@ bodyPartToBlocks (Tbl cap _ look (r:rs)) = do
widths = replicate size 0 :: [Double]
return [Table caption alignments widths hdrCells cells]
-bodyPartToBlocks (OMathPara _ maths) = do
- omaths <- mapM oMathToExps maths
+bodyPartToBlocks (OMathPara exps) = do
return [Para $
- map (\m -> Math DisplayMath (writeTeX m))
- omaths]
+ map (\e -> Math DisplayMath (writeTeX e))
+ exps]
-- replace targets with generated anchors.
rewriteLink :: Inline -> DocxContext Inline
@@ -717,10 +534,8 @@ bodyToOutput (Body bps) = do
docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
docxToOutput opts (Docx (Document _ body)) =
- let dState = defaultDState
- dEnv = DEnv { docxOptions = opts }
- in
- evalDocxContext (bodyToOutput body) dEnv dState
+ let dEnv = def { docxOptions = opts} in
+ evalDocxContext (bodyToOutput body) dEnv def
ilToCode :: Inline -> String