diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 253 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/OMath.hs | 622 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 391 |
3 files changed, 678 insertions, 588 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 367e26bd0..6dc3f11c2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -84,15 +84,16 @@ import Text.Pandoc.Readers.Docx.Lists import Text.Pandoc.Readers.Docx.Reducible import Text.Pandoc.Shared import Text.Pandoc.MediaBag (insertMedia, MediaBag) -import Data.Maybe (mapMaybe, fromMaybe) -import Data.List (delete, stripPrefix, (\\), intersperse, intersect) +import Data.Maybe (mapMaybe) +import Data.List (delete, stripPrefix, (\\), intersect) import Data.Monoid import Text.TeXMath (writeTeX) -import qualified Text.TeXMath.Types as TM +import Data.Default (Default) import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import Control.Monad.Reader import Control.Monad.State +import Control.Applicative ((<$>)) readDocx :: ReaderOptions -> B.ByteString @@ -104,25 +105,19 @@ readDocx opts bytes = Left _ -> error $ "couldn't parse docx file" data DState = DState { docxAnchorMap :: M.Map String String - , docxMediaBag :: MediaBag - , docxInHeaderBlock :: Bool} + , docxMediaBag :: MediaBag } -defaultDState :: DState -defaultDState = DState { docxAnchorMap = M.empty - , docxMediaBag = mempty - , docxInHeaderBlock = False} +instance Default DState where + def = DState { docxAnchorMap = M.empty + , docxMediaBag = mempty } -data DEnv = DEnv { docxOptions :: ReaderOptions} +data DEnv = DEnv { docxOptions :: ReaderOptions + , docxInHeaderBlock :: Bool } -type DocxContext = ReaderT DEnv (State DState) +instance Default DEnv where + def = DEnv def False -withDState :: (DState -> DState) -> DocxContext a -> DocxContext a -withDState f dctx = do - ds <- get - modify f - ctx' <- dctx - put ds - return ctx' +type DocxContext = ReaderT DEnv (State DState) evalDocxContext :: DocxContext a -> DEnv -> DState -> a evalDocxContext ctx env st = evalState (runReaderT ctx env) st @@ -161,7 +156,7 @@ isEmptyPar (Paragraph _ parParts) = isEmptyElem (TextRun s) = trim s == "" isEmptyElem _ = True isEmptyPar _ = False - + bodyPartsToMeta' :: [BodyPart] -> DocxContext (M.Map String MetaValue) bodyPartsToMeta' [] = return M.empty bodyPartsToMeta' (bp : bps) @@ -170,7 +165,7 @@ bodyPartsToMeta' (bp : bps) , (Just metaField) <- M.lookup c metaStyles = do inlines <- parPartsToInlines parParts remaining <- bodyPartsToMeta' bps - let + let f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils'] f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks ((Para ils) : blks) f m (MetaList mv) = MetaList (m : mv) @@ -321,6 +316,13 @@ runToInlines (Footnote bps) = concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks]) runToInlines (Endnote bps) = concatMapM bodyPartToBlocks bps >>= (\blks -> return [Note blks]) +runToInlines (InlineDrawing fp bs) = do + mediaBag <- gets docxMediaBag + modify $ \s -> s { docxMediaBag = insertMedia fp Nothing bs mediaBag } + return [Image [] (fp, "")] + + + parPartToInlines :: ParPart -> DocxContext [Inline] parPartToInlines (PlainRun r) = runToInlines r @@ -350,7 +352,7 @@ parPartToInlines (BookMark _ anchor) = -- user-defined anchor links with header auto ids. do -- get whether we're in a header. - inHdrBool <- gets docxInHeaderBlock + inHdrBool <- asks docxInHeaderBlock -- Get the anchor map. anchorMap <- gets docxAnchorMap -- We don't want to rewrite if we're in a header, since we'll take @@ -365,7 +367,8 @@ parPartToInlines (BookMark _ anchor) = if not inHdrBool && anchor `elem` (M.elems anchorMap) then uniqueIdent [Str anchor] (M.elems anchorMap) else anchor - modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap} + unless inHdrBool + (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}) return [Span (newAnchor, ["anchor"], []) []] parPartToInlines (Drawing fp bs) = do mediaBag <- gets docxMediaBag @@ -377,193 +380,8 @@ parPartToInlines (InternalHyperLink anchor runs) = do parPartToInlines (ExternalHyperLink target runs) = do ils <- concatMapM runToInlines runs return [Link ils (target, "")] -parPartToInlines (PlainOMath omath) = do - e <- oMathToExps omath - return [Math InlineMath (writeTeX e)] - -oMathToExps :: OMath -> DocxContext [TM.Exp] -oMathToExps (OMath oMathElems) = concatMapM oMathElemToExps oMathElems - -oMathElemToExps :: OMathElem -> DocxContext [TM.Exp] -oMathElemToExps (Accent style base) = do - baseExp <- baseToExp base - let chr = case accentChar style of - Just c -> c - Nothing -> '\180' -- default to acute. - return [TM.EOver False baseExp (TM.ESymbol TM.Accent [chr])] -oMathElemToExps(Bar style base) = do - baseExp <- baseToExp base - return $ case barPos style of - Top -> [TM.EOver False baseExp (TM.ESymbol TM.Accent "\175")] - Bottom -> [TM.EUnder False baseExp (TM.ESymbol TM.Accent "\818")] -oMathElemToExps (Box base) = - (\e -> return [e]) =<< baseToExp base -oMathElemToExps (BorderBox base) = - -- TODO: This should be "\\boxed" somehow - (\e -> return [e]) =<< baseToExp base -oMathElemToExps (Delimiter dPr bases) = do - baseExps <- mapM baseToExp bases - let inDelimExps = map Right baseExps - beg = fromMaybe '(' (delimBegChar dPr) - end = fromMaybe ')' (delimEndChar dPr) - sep = fromMaybe '|' (delimSepChar dPr) - exps = intersperse (Left [sep]) inDelimExps - return [TM.EDelimited [beg] [end] exps] -oMathElemToExps (EquationArray bases) = do - let f b = do bs <- baseToExp' b - return [bs] - baseExps <- mapM f bases - return [TM.EArray [] baseExps] -oMathElemToExps (Fraction num denom) = do - numExp <- concatMapM oMathElemToExps num >>= (return . TM.EGrouped) - denExp <- concatMapM oMathElemToExps denom >>= (return . TM.EGrouped) - return [TM.EFraction TM.NormalFrac numExp denExp] -oMathElemToExps (Function fname base) = do - -- We need a string for the fname, but omml gives it to us as a - -- series of oMath elems. We're going to filter out the oMathRuns, - -- which should work for us most of the time. - let f :: OMathElem -> String - f (OMathRun _ run) = runToString run - f _ = "" - fnameString = concatMap f fname - baseExp <- baseToExp base - return [TM.EMathOperator fnameString, baseExp] -oMathElemToExps (Group style base) - | Just Top <- groupPos style = do - baseExp <- baseToExp base - let chr = case groupChr style of - Just c -> c - Nothing -> '\65079' -- default to overbrace - return [TM.EOver False baseExp (TM.ESymbol TM.Accent [chr])] - | otherwise = do - baseExp <- baseToExp base - let chr = case groupChr style of - Just c -> c - Nothing -> '\65080' -- default to underbrace - return [TM.EUnder False baseExp (TM.ESymbol TM.Accent [chr])] -oMathElemToExps (LowerLimit base limElems) = do - baseExp <- baseToExp base - lim <- concatMapM oMathElemToExps limElems >>= (return . TM.EGrouped) - return [TM.EUnder True lim baseExp] -oMathElemToExps (UpperLimit base limElems) = do - baseExp <- baseToExp base - lim <- concatMapM oMathElemToExps limElems >>= (return . TM.EGrouped) - return [TM.EOver True lim baseExp] -oMathElemToExps (Matrix bases) = do - rows <- mapM (mapM (\b -> baseToExp' b)) bases - return [TM.EArray [TM.AlignCenter] rows] -oMathElemToExps (NAry style sub sup base) = do - subExps <- concatMapM oMathElemToExps sub - supExps <- concatMapM oMathElemToExps sup - baseExp <- baseToExp base - let opChar = case nAryChar style of - Just c -> c - -- default to integral - Nothing -> '\8747' - return [ TM.ESubsup - (TM.ESymbol TM.Op [opChar]) - (TM.EGrouped subExps) - (TM.EGrouped supExps) - , baseExp] -oMathElemToExps (Phantom base) = - (\e -> return [TM.EPhantom e]) =<< baseToExp base -oMathElemToExps (Radical degree base) = do - degExps <- concatMapM oMathElemToExps degree - baseExp <- baseToExp base - return $ case degExps of - [] -> [TM.ESqrt baseExp] - ds -> [TM.ERoot (TM.EGrouped ds) baseExp] -oMathElemToExps (PreSubSuper sub sup base) = do - subExps <- concatMapM oMathElemToExps sub - supExps <- concatMapM oMathElemToExps sup - baseExp <- baseToExp base - return [ TM.ESubsup - (TM.EIdentifier "") (TM.EGrouped subExps) (TM.EGrouped supExps) - , baseExp] -oMathElemToExps (Sub base sub) = do - baseExp <- baseToExp base - subExps <- concatMapM oMathElemToExps sub - return [TM.ESub baseExp (TM.EGrouped subExps)] -oMathElemToExps (SubSuper base sub sup) = do - baseExp <- baseToExp base - subExps <- concatMapM oMathElemToExps sub - supExps <- concatMapM oMathElemToExps sup - return [TM.ESubsup baseExp (TM.EGrouped subExps) (TM.EGrouped supExps)] -oMathElemToExps (Super base sup) = do - baseExp <- baseToExp base - supExps <- concatMapM oMathElemToExps sup - return [TM.ESuper baseExp (TM.EGrouped supExps)] -oMathElemToExps (OMathRun sty run@(Run _ _)) - | NoStyle <- oMathRunTextStyle sty = - return $ [TM.EIdentifier $ runToString run] - | Nothing <- oMathRunStyleToTextType sty = - return $ [TM.EIdentifier $ runToString run] - | Just textType <- oMathRunStyleToTextType sty = - return $ if oMathLit sty - then [TM.EText textType (runToString run)] - else [TM.EStyled textType [TM.EIdentifier $ runToString run]] -oMathElemToExps (OMathRun _ _) = return [] - -oMathRunStyleToTextType :: OMathRunStyle -> Maybe TM.TextType -oMathRunStyleToTextType mrPr - | Normal <- oMathRunTextStyle mrPr = - Just $ TM.TextNormal - | Styled scr sty <- oMathRunTextStyle mrPr - ,Just OBold <- sty - , Just OSansSerif <- scr = - Just $ TM.TextSansSerifBold - | Styled scr sty <- oMathRunTextStyle mrPr - , Just OBoldItalic <- sty - , Just OSansSerif <- scr = - Just $ TM.TextSansSerifBoldItalic - | Styled scr sty <- oMathRunTextStyle mrPr - , Just OBold <- sty - , Just OScript <- scr = - Just $ TM.TextBoldScript - | Styled scr sty <- oMathRunTextStyle mrPr - , Just OBold <- sty - , Just OFraktur <- scr = - Just $ TM.TextBoldFraktur - | Styled scr sty <- oMathRunTextStyle mrPr - , Just OItalic <- sty - , Just OSansSerif <- scr = - Just $ TM.TextSansSerifItalic - | Styled _ sty <- oMathRunTextStyle mrPr - , Just OBold <- sty = - Just $ TM.TextBold - | Styled _ sty <- oMathRunTextStyle mrPr - , Just OItalic <- sty = - Just $ TM.TextItalic - | Styled scr _ <- oMathRunTextStyle mrPr - , Just OMonospace <- scr = - Just $ TM.TextMonospace - | Styled scr _ <- oMathRunTextStyle mrPr - , Just OSansSerif <- scr = - Just $ TM.TextSansSerif - | Styled scr _ <- oMathRunTextStyle mrPr - , Just ODoubleStruck <- scr = - Just $ TM.TextDoubleStruck - | Styled scr _ <- oMathRunTextStyle mrPr - , Just OScript <- scr = - Just $ TM.TextDoubleStruck - | Styled scr _ <- oMathRunTextStyle mrPr - , Just OFraktur <- scr = - Just $ TM.TextFraktur - | Styled _ sty <- oMathRunTextStyle mrPr - , Just OBoldItalic <- sty = - Just $ TM.TextBoldItalic - | otherwise = Nothing - - - -baseToExp :: Base -> DocxContext TM.Exp -baseToExp (Base mathElems) = - concatMapM oMathElemToExps mathElems >>= (return . TM.EGrouped) - --- an ungrouped version of baseToExp -baseToExp' :: Base -> DocxContext [TM.Exp] -baseToExp' (Base mathElems) = - concatMapM oMathElemToExps mathElems +parPartToInlines (PlainOMath exps) = do + return [Math InlineMath (writeTeX exps)] isAnchorSpan :: Inline -> Bool @@ -631,8 +449,8 @@ bodyPartToBlocks (Paragraph pPr parparts) [CodeBlock ("", [], []) (concatMap parPartToString parparts)] bodyPartToBlocks (Paragraph pPr parparts) | any isHeaderContainer (parStyleToContainers pPr) = do - ils <-withDState (\s -> s{docxInHeaderBlock = True}) $ - parPartsToInlines parparts >>= (return . normalizeSpaces) + ils <- normalizeSpaces <$> local (\s -> s{docxInHeaderBlock = True}) + (parPartsToInlines parparts) let (Container hdrFun) = head $ filter isHeaderContainer (parStyleToContainers pPr) Header n attr _ = hdrFun [] hdr <- makeHeaderAnchor $ Header n attr ils @@ -689,11 +507,10 @@ bodyPartToBlocks (Tbl cap _ look (r:rs)) = do widths = replicate size 0 :: [Double] return [Table caption alignments widths hdrCells cells] -bodyPartToBlocks (OMathPara _ maths) = do - omaths <- mapM oMathToExps maths +bodyPartToBlocks (OMathPara exps) = do return [Para $ - map (\m -> Math DisplayMath (writeTeX m)) - omaths] + map (\e -> Math DisplayMath (writeTeX e)) + exps] -- replace targets with generated anchors. rewriteLink :: Inline -> DocxContext Inline @@ -717,10 +534,8 @@ bodyToOutput (Body bps) = do docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag) docxToOutput opts (Docx (Document _ body)) = - let dState = defaultDState - dEnv = DEnv { docxOptions = opts } - in - evalDocxContext (bodyToOutput body) dEnv dState + let dEnv = def { docxOptions = opts} in + evalDocxContext (bodyToOutput body) dEnv def ilToCode :: Inline -> String diff --git a/src/Text/Pandoc/Readers/Docx/OMath.hs b/src/Text/Pandoc/Readers/Docx/OMath.hs new file mode 100644 index 000000000..309aaefe8 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/OMath.hs @@ -0,0 +1,622 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Math + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal <jrosenthal@jhu.edu> + Stability : alpha + Portability : portable + +Types and functions for conversion of OMML into TeXMath. +-} + +module Text.Pandoc.Readers.Docx.OMath ( elemToExps + ) where + +import Text.XML.Light +import Data.Maybe (mapMaybe, fromMaybe) +import Data.List (intersperse) +import qualified Text.TeXMath.Types as TM +import Control.Applicative ((<$>)) + +type NameSpaces = [(String, String)] + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == (lookup prefix ns) + + +data OMath = OMath [OMathElem] + deriving Show + +data OMathElem = Accent AccentStyle Base + | Bar BarStyle Base + | Box Base + | BorderBox Base + | Delimiter DelimStyle [Base] + | EquationArray [Base] + | Fraction [OMathElem] [OMathElem] + | Function [OMathElem] Base + | Group GroupStyle Base + | LowerLimit Base [OMathElem] + | UpperLimit Base [OMathElem] + | Matrix [[Base]] + | NAry NAryStyle [OMathElem] [OMathElem] Base + | Phantom Base + | Radical [OMathElem] Base + | PreSubSuper [OMathElem] [OMathElem] Base + | Sub Base [OMathElem] + | SubSuper Base [OMathElem] [OMathElem] + | Super Base [OMathElem] + | OMathRun OMathRunStyle [OMathRunElem] + deriving Show + +data OMathRunElem = TextRun String + | LnBrk + | Tab + deriving Show + +data Base = Base [OMathElem] + deriving Show + +data TopBottom = Top | Bottom + deriving Show + +data AccentStyle = AccentStyle { accentChar :: Maybe Char } + deriving Show + +data BarStyle = BarStyle { barPos :: TopBottom} + deriving Show + +data NAryStyle = NAryStyle { nAryChar :: Maybe Char + , nAryLimLoc :: LimLoc} + deriving Show + +data OMathRunStyle = OMathRunStyle { oMathLit :: Bool + , oMathRunTextStyle :: OMathRunTextStyle } + deriving Show + +data OMathRunTextStyle = NoStyle + | Normal + | Styled { oMathScript :: Maybe OMathTextScript + , oMathStyle :: Maybe OMathTextStyle } + deriving Show + +data OMathTextScript = ORoman + | OScript + | OFraktur + | ODoubleStruck + | OSansSerif + | OMonospace + deriving (Show, Eq) + +data OMathTextStyle = OPlain + | OBold + | OItalic + | OBoldItalic + deriving (Show, Eq) + +defaultNAryStyle :: NAryStyle +defaultNAryStyle = NAryStyle { nAryChar = Nothing -- integral, in practice + , nAryLimLoc = SubSup } + +data LimLoc = SubSup | UnderOver deriving Show + +data DelimStyle = DelimStyle { delimBegChar :: Maybe Char + , delimSepChar :: Maybe Char + , delimEndChar :: Maybe Char} + deriving Show + +defaultDelimStyle :: DelimStyle +defaultDelimStyle = DelimStyle { delimBegChar = Nothing + , delimSepChar = Nothing + , delimEndChar = Nothing } + +data GroupStyle = GroupStyle { groupChr :: Maybe Char + , groupPos :: Maybe TopBottom } + deriving Show + +defaultGroupStyle :: GroupStyle +defaultGroupStyle = GroupStyle {groupChr = Nothing, groupPos = Nothing} + +elemToMath :: NameSpaces -> Element -> Maybe OMath +elemToMath ns element | isElem ns "m" "oMath" element = + Just $ OMath $ mapMaybe (elemToMathElem ns) (elChildren element) +elemToMath _ _ = Nothing + +elemToBase :: NameSpaces -> Element -> Maybe Base +elemToBase ns element | isElem ns "m" "e" element = + Just $ Base $ mapMaybe (elemToMathElem ns) (elChildren element) +elemToBase _ _ = Nothing + +-- TODO: The right way to do this is to use the ampersand to break the +-- text lines into multiple columns. That's tricky, though, and this +-- will get us most of the way for the time being. +filterAmpersand :: OMathElem -> OMathElem +filterAmpersand (OMathRun mrPr elems) = + let f (TextRun s) = TextRun $ filter ('&' /=) s + f re = re + in + OMathRun mrPr (map f elems) +filterAmpersand e = e + +elemToBaseNoAmpersand :: NameSpaces -> Element -> Maybe Base +elemToBaseNoAmpersand ns element | isElem ns "m" "e" element = + return $ Base $ + mapMaybe + (\e -> (elemToMathElem ns e >>= (return . filterAmpersand))) + (elChildren element) +elemToBaseNoAmpersand _ _ = Nothing + +elemToOMathRunStyle :: NameSpaces -> Element -> OMathRunStyle +elemToOMathRunStyle ns element = + let lit = + case + findChild (elemName ns "m" "lit") element >>= + findAttr (elemName ns "m" "val") + of + Just "on" -> True + _ -> False + in + OMathRunStyle { oMathLit = lit + , oMathRunTextStyle = (elemToOMathRunTextStyle ns element) + } + +elemToOMathRunTextStyle :: NameSpaces -> Element -> OMathRunTextStyle +elemToOMathRunTextStyle ns element + | Just mrPr <- findChild (elemName ns "m" "rPr") element + , Just _ <- findChild (elemName ns "m" "nor") mrPr = + Normal + | Just mrPr <- findChild (elemName ns "m" "rPr") element = + let scr = + case + findChild (elemName ns "m" "scr") mrPr >>= + findAttr (elemName ns "m" "val") + of + Just "roman" -> Just ORoman + Just "script" -> Just OScript + Just "fraktur" -> Just OFraktur + Just "double-struck" -> Just ODoubleStruck + Just "sans-serif" -> Just OSansSerif + Just "monospace" -> Just OMonospace + _ -> Nothing + + sty = + case + findChild (elemName ns "m" "sty") mrPr >>= + findAttr (elemName ns "m" "val") + of + Just "p" -> Just OPlain + Just "b" -> Just OBold + Just "i" -> Just OItalic + Just "bi" -> Just OBoldItalic + _ -> Nothing + in + Styled { oMathScript = scr, oMathStyle = sty } + | otherwise = NoStyle + + + +elemToNAryStyle :: NameSpaces -> Element -> NAryStyle +elemToNAryStyle ns element + | Just narypr <- findChild (QName "naryPr" (lookup "m" ns) (Just "m")) element = + let + chr = findChild (QName "chr" (lookup "m" ns) (Just "m")) narypr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + Just . head + limLoc = findChild (QName "limLoc" (lookup "m" ns) (Just "m")) narypr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) + limLoc' = case limLoc of + Just "undOver" -> UnderOver + Just "subSup" -> SubSup + _ -> SubSup + in + NAryStyle { nAryChar = chr, nAryLimLoc = limLoc'} +elemToNAryStyle _ _ = defaultNAryStyle + +elemToDelimStyle :: NameSpaces -> Element -> DelimStyle +elemToDelimStyle ns element + | Just dPr <- findChild (QName "dPr" (lookup "m" ns) (Just "m")) element = + let begChr = findChild (QName "begChr" (lookup "m" ns) (Just "m")) dPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\c -> if null c then (Just ' ') else (Just $ head c)) + sepChr = findChild (QName "sepChr" (lookup "m" ns) (Just "m")) dPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\c -> if null c then (Just ' ') else (Just $ head c)) + endChr = findChild (QName "endChr" (lookup "m" ns) (Just "m")) dPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\c -> if null c then (Just ' ') else (Just $ head c)) + in + DelimStyle { delimBegChar = begChr + , delimSepChar = sepChr + , delimEndChar = endChr} +elemToDelimStyle _ _ = defaultDelimStyle + +elemToGroupStyle :: NameSpaces -> Element -> GroupStyle +elemToGroupStyle ns element + | Just gPr <- findChild (QName "groupChrPr" (lookup "m" ns) (Just "m")) element = + let chr = findChild (QName "chr" (lookup "m" ns) (Just "m")) gPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + Just . head + pos = findChild (QName "pos" (lookup "m" ns) (Just "m")) gPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\s -> Just $ if s == "top" then Top else Bottom) + in + GroupStyle { groupChr = chr, groupPos = pos } +elemToGroupStyle _ _ = defaultGroupStyle + +elemToMathElem :: NameSpaces -> Element -> Maybe OMathElem +elemToMathElem ns element | isElem ns "m" "acc" element = do + let accChar = + findChild (elemName ns "m" "accPr") element >>= + findChild (elemName ns "m" "chr") >>= + findAttr (elemName ns "m" "val") >>= + Just . head + accPr = AccentStyle { accentChar = accChar} + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ Accent accPr base +elemToMathElem ns element | isElem ns "m" "bar" element = do + barPr <- findChild (QName "barPr" (lookup "m" ns) (Just "m")) element >>= + findChild (QName "pos" (lookup "m" ns) (Just "m")) >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\s -> + Just $ BarStyle { + barPos = (if s == "bot" then Bottom else Top) + }) + base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= + elemToBase ns + return $ Bar barPr base +elemToMathElem ns element | isElem ns "m" "box" element = + findChild (elemName ns "m" "e") element >>= + elemToBase ns >>= + (\b -> return $ Box b) +elemToMathElem ns element | isElem ns "m" "borderBox" element = + findChild (elemName ns "m" "e") element >>= + elemToBase ns >>= + (\b -> return $ BorderBox b) +elemToMathElem ns element | isElem ns "m" "d" element = + let style = elemToDelimStyle ns element + in + return $ Delimiter style $ mapMaybe (elemToBase ns) (elChildren element) +elemToMathElem ns element | isElem ns "m" "eqArr" element = + return $ EquationArray $ mapMaybe (elemToBaseNoAmpersand ns) (elChildren element) +elemToMathElem ns element | isElem ns "m" "f" element = do + num <- findChild (elemName ns "m" "num") element + den <- findChild (elemName ns "m" "den") element + let numElems = mapMaybe (elemToMathElem ns) (elChildren num) + denElems = mapMaybe (elemToMathElem ns) (elChildren den) + return $ Fraction numElems denElems +elemToMathElem ns element | isElem ns "m" "func" element = do + fName <- findChild (elemName ns "m" "fName") element + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + let fnElems = mapMaybe (elemToMathElem ns) (elChildren fName) + return $ Function fnElems base +elemToMathElem ns element | isElem ns "m" "groupChr" element = + let style = elemToGroupStyle ns element + in + findChild (elemName ns "m" "e") element >>= + elemToBase ns >>= + (\b -> return $ Group style b) +elemToMathElem ns element | isElem ns "m" "limLow" element = do + base <- findChild (elemName ns "m" "e") element + >>= elemToBase ns + lim <- findChild (elemName ns "m" "lim") element + let limElems = mapMaybe (elemToMathElem ns) (elChildren lim) + return $ LowerLimit base limElems +elemToMathElem ns element | isElem ns "m" "limUpp" element = do + base <- findChild (elemName ns "m" "e") element + >>= elemToBase ns + lim <- findChild (elemName ns "m" "lim") element + let limElems = mapMaybe (elemToMathElem ns) (elChildren lim) + return $ UpperLimit base limElems +elemToMathElem ns element | isElem ns "m" "m" element = do + let rows = findChildren (elemName ns "m" "mr") element + let bases = mapMaybe (\mr -> mapM (elemToBase ns) (elChildren mr)) rows + return $ Matrix bases +elemToMathElem ns element | isElem ns "m" "nary" element = do + let style = elemToNAryStyle ns element + sub <- findChild (elemName ns "m" "sub") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + sup <- findChild (elemName ns "m" "sup") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ NAry style sub sup base +elemToMathElem ns element | isElem ns "m" "rad" element = do + deg <- findChild (elemName ns "m" "deg") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ Radical deg base +elemToMathElem ns element | isElem ns "m" "phant" element = do + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ Phantom base +elemToMathElem ns element | isElem ns "m" "sPre" element = do + sub <- findChild (elemName ns "m" "sub") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + sup <- findChild (elemName ns "m" "sup") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ PreSubSuper sub sup base +elemToMathElem ns element | isElem ns "m" "sSub" element = do + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + sub <- findChild (elemName ns "m" "sub") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + return $ Sub base sub +elemToMathElem ns element | isElem ns "m" "sSubSup" element = do + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + sub <- findChild (elemName ns "m" "sub") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + sup <- findChild (elemName ns "m" "sup") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + return $ SubSuper base sub sup +elemToMathElem ns element | isElem ns "m" "sSup" element = do + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + sup <- findChild (elemName ns "m" "sup") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + return $ Super base sup +elemToMathElem ns element | isElem ns "m" "r" element = do + let mrPr = elemToOMathRunStyle ns element + mrElems <- elemToOMathRunElems ns element + return $ OMathRun mrPr mrElems +elemToMathElem _ _ = Nothing + +elemToOMathRunElem :: NameSpaces -> Element -> Maybe OMathRunElem +elemToOMathRunElem ns element + | isElem ns "w" "t" element + || isElem ns "m" "t" element + || isElem ns "w" "delText" element = Just $ TextRun $ strContent element + | isElem ns "w" "br" element = Just LnBrk + | isElem ns "w" "tab" element = Just Tab + | otherwise = Nothing + +elemToOMathRunElems :: NameSpaces -> Element -> Maybe [OMathRunElem] +elemToOMathRunElems ns element + | isElem ns "w" "r" element + || isElem ns "m" "r" element = + Just $ mapMaybe (elemToOMathRunElem ns) (elChildren element) +elemToOMathRunElems _ _ = Nothing + +----- And now the TeXMath Creation + +oMathRunElemToString :: OMathRunElem -> String +oMathRunElemToString (TextRun s) = s +oMathRunElemToString (LnBrk) = ['\n'] +oMathRunElemToString (Tab) = ['\t'] + +oMathRunElemsToString :: [OMathRunElem] -> String +oMathRunElemsToString = concatMap oMathRunElemToString + +oMathElemToString :: OMathElem -> String +oMathElemToString (OMathRun _ oMathRunElems) = + oMathRunElemsToString oMathRunElems +oMathElemToString _ = "" + + +oMathToExps :: OMath -> [TM.Exp] +oMathToExps (OMath oMathElems) = concatMap oMathElemToExps oMathElems + +oMathElemToExps :: OMathElem -> [TM.Exp] +oMathElemToExps (Accent style base) = + let baseExp = baseToExp base + chr = case accentChar style of + Just c -> c + Nothing -> '\180' -- default to acute. + in + [TM.EOver False baseExp (TM.ESymbol TM.Accent [chr])] +oMathElemToExps(Bar style base) = + let baseExp = baseToExp base + in + case barPos style of + Top -> [TM.EOver False baseExp (TM.ESymbol TM.Accent "\175")] + Bottom -> [TM.EUnder False baseExp (TM.ESymbol TM.Accent "\818")] +oMathElemToExps (Box base) = [baseToExp base] +oMathElemToExps (BorderBox base) = + -- TODO: This should be "\\boxed" somehow + [baseToExp base] +oMathElemToExps (Delimiter dPr bases) = + let baseExps = map baseToExp bases + inDelimExps = map Right baseExps + beg = fromMaybe '(' (delimBegChar dPr) + end = fromMaybe ')' (delimEndChar dPr) + sep = fromMaybe '|' (delimSepChar dPr) + exps = intersperse (Left [sep]) inDelimExps + in + [TM.EDelimited [beg] [end] exps] +oMathElemToExps (EquationArray bases) = + let baseExps = map (\b -> [baseToExp' b]) bases + in + [TM.EArray [] baseExps] +oMathElemToExps (Fraction num denom) = + let numExp = TM.EGrouped $ concatMap oMathElemToExps num + denExp = TM.EGrouped $ concatMap oMathElemToExps denom + in + [TM.EFraction TM.NormalFrac numExp denExp] +oMathElemToExps (Function fname base) = + -- We need a string for the fname, but omml gives it to us as a + -- series of oMath elems. We're going to filter out the oMathRuns, + -- which should work for us most of the time. + let fnameString = concatMap oMathElemToString fname + baseExp = baseToExp base + in + [TM.EMathOperator fnameString, baseExp] +oMathElemToExps (Group style base) + | Just Top <- groupPos style = + let baseExp = baseToExp base + chr = case groupChr style of + Just c -> c + Nothing -> '\65079' -- default to overbrace + in + [TM.EOver False baseExp (TM.ESymbol TM.Accent [chr])] + | otherwise = + let baseExp = baseToExp base + chr = case groupChr style of + Just c -> c + Nothing -> '\65080' -- default to underbrace + in + [TM.EUnder False baseExp (TM.ESymbol TM.Accent [chr])] +oMathElemToExps (LowerLimit base limElems) = do + let baseExp = baseToExp base + lim = TM.EGrouped $ concatMap oMathElemToExps limElems + in + [TM.EUnder True lim baseExp] +oMathElemToExps (UpperLimit base limElems) = + let baseExp = baseToExp base + lim = TM.EGrouped $ concatMap oMathElemToExps limElems + in + [TM.EOver True lim baseExp] +oMathElemToExps (Matrix bases) = + let rows = map (map baseToExp') bases + in + [TM.EArray [TM.AlignCenter] rows] +oMathElemToExps (NAry style sub sup base) = + let + subExps = concatMap oMathElemToExps sub + supExps = concatMap oMathElemToExps sup + baseExp = baseToExp base + opChar = case nAryChar style of + Just c -> c + -- default to integral + Nothing -> '\8747' + in [ TM.ESubsup + (TM.ESymbol TM.Op [opChar]) + (TM.EGrouped subExps) + (TM.EGrouped supExps) + , baseExp] +oMathElemToExps (Phantom base) = + [TM.EPhantom $ baseToExp base] +oMathElemToExps (Radical degree base) = + let degExps = concatMap oMathElemToExps degree + baseExp = baseToExp base + in + case degExps of + [] -> [TM.ESqrt baseExp] + ds -> [TM.ERoot (TM.EGrouped ds) baseExp] +oMathElemToExps (PreSubSuper sub sup base) = + let subExps = concatMap oMathElemToExps sub + supExps = concatMap oMathElemToExps sup + baseExp = baseToExp base + in [ TM.ESubsup + (TM.EIdentifier "") (TM.EGrouped subExps) (TM.EGrouped supExps) + , baseExp] +oMathElemToExps (Sub base sub) = + let baseExp = baseToExp base + subExps = concatMap oMathElemToExps sub + in + [TM.ESub baseExp (TM.EGrouped subExps)] +oMathElemToExps (SubSuper base sub sup) = + let baseExp = baseToExp base + subExps = concatMap oMathElemToExps sub + supExps = concatMap oMathElemToExps sup + in + [TM.ESubsup baseExp (TM.EGrouped subExps) (TM.EGrouped supExps)] +oMathElemToExps (Super base sup) = + let baseExp = baseToExp base + supExps = concatMap oMathElemToExps sup + in + [TM.ESuper baseExp (TM.EGrouped supExps)] +oMathElemToExps (OMathRun sty elems) + | NoStyle <- oMathRunTextStyle sty = + [TM.EIdentifier $ oMathRunElemsToString elems] + | Nothing <- oMathRunStyleToTextType sty = + [TM.EIdentifier $ oMathRunElemsToString elems] + | Just textType <- oMathRunStyleToTextType sty = + if oMathLit sty + then [TM.EText textType (oMathRunElemsToString elems)] + else [TM.EStyled textType [TM.EIdentifier $ oMathRunElemsToString elems]] +oMathElemToExps (OMathRun _ _) = [] + +oMathRunStyleToTextType :: OMathRunStyle -> Maybe TM.TextType +oMathRunStyleToTextType mrPr + | Normal <- oMathRunTextStyle mrPr = + Just $ TM.TextNormal + | Styled scr sty <- oMathRunTextStyle mrPr + ,Just OBold <- sty + , Just OSansSerif <- scr = + Just $ TM.TextSansSerifBold + | Styled scr sty <- oMathRunTextStyle mrPr + , Just OBoldItalic <- sty + , Just OSansSerif <- scr = + Just $ TM.TextSansSerifBoldItalic + | Styled scr sty <- oMathRunTextStyle mrPr + , Just OBold <- sty + , Just OScript <- scr = + Just $ TM.TextBoldScript + | Styled scr sty <- oMathRunTextStyle mrPr + , Just OBold <- sty + , Just OFraktur <- scr = + Just $ TM.TextBoldFraktur + | Styled scr sty <- oMathRunTextStyle mrPr + , Just OItalic <- sty + , Just OSansSerif <- scr = + Just $ TM.TextSansSerifItalic + | Styled _ sty <- oMathRunTextStyle mrPr + , Just OBold <- sty = + Just $ TM.TextBold + | Styled _ sty <- oMathRunTextStyle mrPr + , Just OItalic <- sty = + Just $ TM.TextItalic + | Styled scr _ <- oMathRunTextStyle mrPr + , Just OMonospace <- scr = + Just $ TM.TextMonospace + | Styled scr _ <- oMathRunTextStyle mrPr + , Just OSansSerif <- scr = + Just $ TM.TextSansSerif + | Styled scr _ <- oMathRunTextStyle mrPr + , Just ODoubleStruck <- scr = + Just $ TM.TextDoubleStruck + | Styled scr _ <- oMathRunTextStyle mrPr + , Just OScript <- scr = + Just $ TM.TextDoubleStruck + | Styled scr _ <- oMathRunTextStyle mrPr + , Just OFraktur <- scr = + Just $ TM.TextFraktur + | Styled _ sty <- oMathRunTextStyle mrPr + , Just OBoldItalic <- sty = + Just $ TM.TextBoldItalic + | otherwise = Nothing + + + +baseToExp :: Base -> TM.Exp +baseToExp b = TM.EGrouped $ baseToExp' b + +-- an ungrouped version of baseToExp +baseToExp' :: Base -> [TM.Exp] +baseToExp' (Base mathElems) = + concatMap oMathElemToExps mathElems + +elemToExps :: NameSpaces -> Element -> Maybe [TM.Exp] +elemToExps ns element = oMathToExps <$> (elemToMath ns element) + diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 96210c31a..3b2e7c5ca 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -36,19 +36,6 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , BodyPart(..) , TblLook(..) , ParPart(..) - , OMath(..) - , OMathElem(..) - , Base(..) - , TopBottom(..) - , AccentStyle(..) - , BarStyle(..) - , NAryStyle(..) - , DelimStyle(..) - , GroupStyle(..) - , OMathRunStyle(..) - , OMathRunTextStyle(..) - , OMathTextScript(..) - , OMathTextStyle(..) , Run(..) , RunElem(..) , Notes @@ -74,6 +61,8 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader import qualified Data.Map as M import Text.Pandoc.Compat.Except +import Text.Pandoc.Readers.Docx.OMath (elemToExps) +import Text.TeXMath (Exp) data ReaderEnv = ReaderEnv { envNotes :: Notes , envNumbering :: Numbering @@ -161,7 +150,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] | ListItem ParagraphStyle String String Level [ParPart] | Tbl String TblGrid TblLook [Row] - | OMathPara OMathParaStyle [OMath] + | OMathPara [[Exp]] deriving Show type TblGrid = [Integer] @@ -185,103 +174,13 @@ data ParPart = PlainRun Run | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] | Drawing FilePath B.ByteString - | PlainOMath OMath + | PlainOMath [Exp] deriving Show -data OMath = OMath [OMathElem] - deriving Show - -data OMathElem = Accent AccentStyle Base - | Bar BarStyle Base - | Box Base - | BorderBox Base - | Delimiter DelimStyle [Base] - | EquationArray [Base] - | Fraction [OMathElem] [OMathElem] - | Function [OMathElem] Base - | Group GroupStyle Base - | LowerLimit Base [OMathElem] - | UpperLimit Base [OMathElem] - | Matrix [[Base]] - | NAry NAryStyle [OMathElem] [OMathElem] Base - | Phantom Base - | Radical [OMathElem] Base - | PreSubSuper [OMathElem] [OMathElem] Base - | Sub Base [OMathElem] - | SubSuper Base [OMathElem] [OMathElem] - | Super Base [OMathElem] - | OMathRun OMathRunStyle Run - deriving Show - -data Base = Base [OMathElem] - deriving Show - --- placeholders -type OMathParaStyle = [String] - -data TopBottom = Top | Bottom - deriving Show - -data AccentStyle = AccentStyle { accentChar :: Maybe Char } - deriving Show - -data BarStyle = BarStyle { barPos :: TopBottom} - deriving Show - -data NAryStyle = NAryStyle { nAryChar :: Maybe Char - , nAryLimLoc :: LimLoc} - deriving Show - -data OMathRunStyle = OMathRunStyle { oMathLit :: Bool - , oMathRunTextStyle :: OMathRunTextStyle } - deriving Show - -data OMathRunTextStyle = NoStyle - | Normal - | Styled { oMathScript :: Maybe OMathTextScript - , oMathStyle :: Maybe OMathTextStyle } - deriving Show - -data OMathTextScript = ORoman - | OScript - | OFraktur - | ODoubleStruck - | OSansSerif - | OMonospace - deriving (Show, Eq) - -data OMathTextStyle = OPlain - | OBold - | OItalic - | OBoldItalic - deriving (Show, Eq) - -defaultNAryStyle :: NAryStyle -defaultNAryStyle = NAryStyle { nAryChar = Nothing -- integral, in practice - , nAryLimLoc = SubSup } - -data LimLoc = SubSup | UnderOver deriving Show - -data DelimStyle = DelimStyle { delimBegChar :: Maybe Char - , delimSepChar :: Maybe Char - , delimEndChar :: Maybe Char} - deriving Show - -defaultDelimStyle :: DelimStyle -defaultDelimStyle = DelimStyle { delimBegChar = Nothing - , delimSepChar = Nothing - , delimEndChar = Nothing } - -data GroupStyle = GroupStyle { groupChr :: Maybe Char - , groupPos :: Maybe TopBottom } - deriving Show - -defaultGroupStyle :: GroupStyle -defaultGroupStyle = GroupStyle {groupChr = Nothing, groupPos = Nothing} - data Run = Run RunStyle [RunElem] | Footnote [BodyPart] | Endnote [BodyPart] + | InlineDrawing FilePath B.ByteString deriving Show data RunElem = TextRun String | LnBrk | Tab @@ -576,9 +475,8 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildren (elemName ns "m" "oMathPara") element = do - let style = [] -- placeholder - maths <- mapD (elemToMath ns) (elChildren c) - return $ OMathPara style maths + expsLst <- mapD (\e -> (maybeToD $ elemToExps ns e)) (elChildren c) + return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element , Just (numId, lvl) <- elemToNumInfo ns element = do @@ -614,274 +512,18 @@ elemToBodyPart ns element return $ Tbl caption grid tblLook rows elemToBodyPart _ _ = throwError WrongElem -elemToMath :: NameSpaces -> Element -> D OMath -elemToMath ns element | isElem ns "m" "oMath" element = - mapD (elemToMathElem ns) (elChildren element) >>= - (\es -> return $ OMath es) -elemToMath _ _ = throwError WrongElem - -elemToBase :: NameSpaces -> Element -> D Base -elemToBase ns element | isElem ns "m" "e" element = - mapD (elemToMathElem ns) (elChildren element) >>= - (\es -> return $ Base es) -elemToBase _ _ = throwError WrongElem - --- TODO: The right way to do this is to use the ampersand to break the --- text lines into multiple columns. That's tricky, though, and this --- will get us most of the way for the time being. -filterAmpersand :: OMathElem -> OMathElem -filterAmpersand (OMathRun mrPr (Run wrPr elems)) = - let f (TextRun s) = TextRun $ filter ('&' /=) s - f re = re - in - OMathRun mrPr $ Run wrPr (map f elems) -filterAmpersand e = e - -elemToBaseNoAmpersand :: NameSpaces -> Element -> D Base -elemToBaseNoAmpersand ns element | isElem ns "m" "e" element = - mapD - (\e -> (elemToMathElem ns e >>= (return . filterAmpersand))) - (elChildren element) >>= - (\es -> return $ Base es) -elemToBaseNoAmpersand _ _ = throwError WrongElem - - -elemToOMathRunStyle :: NameSpaces -> Element -> OMathRunStyle -elemToOMathRunStyle ns element = - let lit = - case - findChild (elemName ns "m" "lit") element >>= - findAttr (elemName ns "m" "val") - of - Just "on" -> True - _ -> False - in - OMathRunStyle { oMathLit = lit - , oMathRunTextStyle = (elemToOMathRunTextStyle ns element) - } - -elemToOMathRunTextStyle :: NameSpaces -> Element -> OMathRunTextStyle -elemToOMathRunTextStyle ns element - | Just mrPr <- findChild (elemName ns "m" "rPr") element - , Just _ <- findChild (elemName ns "m" "nor") mrPr = - Normal - | Just mrPr <- findChild (elemName ns "m" "rPr") element = - let scr = - case - findChild (elemName ns "m" "scr") mrPr >>= - findAttr (elemName ns "m" "val") - of - Just "roman" -> Just ORoman - Just "script" -> Just OScript - Just "fraktur" -> Just OFraktur - Just "double-struck" -> Just ODoubleStruck - Just "sans-serif" -> Just OSansSerif - Just "monospace" -> Just OMonospace - _ -> Nothing - - sty = - case - findChild (elemName ns "m" "sty") mrPr >>= - findAttr (elemName ns "m" "val") - of - Just "p" -> Just OPlain - Just "b" -> Just OBold - Just "i" -> Just OItalic - Just "bi" -> Just OBoldItalic - _ -> Nothing - in - Styled { oMathScript = scr, oMathStyle = sty } - | otherwise = NoStyle - - - -elemToNAryStyle :: NameSpaces -> Element -> NAryStyle -elemToNAryStyle ns element - | Just narypr <- findChild (QName "naryPr" (lookup "m" ns) (Just "m")) element = - let - chr = findChild (QName "chr" (lookup "m" ns) (Just "m")) narypr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - Just . head - limLoc = findChild (QName "limLoc" (lookup "m" ns) (Just "m")) narypr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) - limLoc' = case limLoc of - Just "undOver" -> UnderOver - Just "subSup" -> SubSup - _ -> SubSup - in - NAryStyle { nAryChar = chr, nAryLimLoc = limLoc'} -elemToNAryStyle _ _ = defaultNAryStyle - -elemToDelimStyle :: NameSpaces -> Element -> DelimStyle -elemToDelimStyle ns element - | Just dPr <- findChild (QName "dPr" (lookup "m" ns) (Just "m")) element = - let begChr = findChild (QName "begChr" (lookup "m" ns) (Just "m")) dPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\c -> if null c then (Just ' ') else (Just $ head c)) - sepChr = findChild (QName "sepChr" (lookup "m" ns) (Just "m")) dPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\c -> if null c then (Just ' ') else (Just $ head c)) - endChr = findChild (QName "endChr" (lookup "m" ns) (Just "m")) dPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\c -> if null c then (Just ' ') else (Just $ head c)) - in - DelimStyle { delimBegChar = begChr - , delimSepChar = sepChr - , delimEndChar = endChr} -elemToDelimStyle _ _ = defaultDelimStyle - -elemToGroupStyle :: NameSpaces -> Element -> GroupStyle -elemToGroupStyle ns element - | Just gPr <- findChild (QName "groupChrPr" (lookup "m" ns) (Just "m")) element = - let chr = findChild (QName "chr" (lookup "m" ns) (Just "m")) gPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - Just . head - pos = findChild (QName "pos" (lookup "m" ns) (Just "m")) gPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\s -> Just $ if s == "top" then Top else Bottom) - in - GroupStyle { groupChr = chr, groupPos = pos } -elemToGroupStyle _ _ = defaultGroupStyle - -elemToMathElem :: NameSpaces -> Element -> D OMathElem -elemToMathElem ns element | isElem ns "m" "acc" element = do - let accChar = - findChild (elemName ns "m" "accPr") element >>= - findChild (elemName ns "m" "chr") >>= - findAttr (elemName ns "m" "val") >>= - Just . head - accPr = AccentStyle { accentChar = accChar} - base <-(maybeToD $ findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ Accent accPr base -elemToMathElem ns element | isElem ns "m" "bar" element = do - barPr <- maybeToD $ - findChild (QName "barPr" (lookup "m" ns) (Just "m")) element >>= - findChild (QName "pos" (lookup "m" ns) (Just "m")) >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\s -> - Just $ BarStyle { - barPos = (if s == "bot" then Bottom else Top) - }) - base <-maybeToD (findChild (QName "e" (lookup "m" ns) (Just "m")) element) >>= - elemToBase ns - return $ Bar barPr base -elemToMathElem ns element | isElem ns "m" "box" element = - maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns >>= - (\b -> return $ Box b) -elemToMathElem ns element | isElem ns "m" "borderBox" element = - maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns >>= - (\b -> return $ BorderBox b) -elemToMathElem ns element | isElem ns "m" "d" element = - let style = elemToDelimStyle ns element - in - mapD (elemToBase ns) (elChildren element) >>= - (\es -> return $ Delimiter style es) -elemToMathElem ns element | isElem ns "m" "eqArr" element = - mapD (elemToBaseNoAmpersand ns) (elChildren element) >>= - (\es -> return $ EquationArray es) -elemToMathElem ns element | isElem ns "m" "f" element = do - num <- maybeToD $ findChild (elemName ns "m" "num") element - den <- maybeToD $ findChild (elemName ns "m" "den") element - numElems <- mapD (elemToMathElem ns) (elChildren num) - denElems <- mapD (elemToMathElem ns) (elChildren den) - return $ Fraction numElems denElems -elemToMathElem ns element | isElem ns "m" "func" element = do - fName <- maybeToD $ findChild (elemName ns "m" "fName") element - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - fnElems <- mapD (elemToMathElem ns) (elChildren fName) - return $ Function fnElems base -elemToMathElem ns element | isElem ns "m" "groupChr" element = - let style = elemToGroupStyle ns element - in - maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns >>= - (\b -> return $ Group style b) -elemToMathElem ns element | isElem ns "m" "limLow" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) - >>= elemToBase ns - lim <- maybeToD $ findChild (elemName ns "m" "lim") element - limElems <- mapD (elemToMathElem ns) (elChildren lim) - return $ LowerLimit base limElems -elemToMathElem ns element | isElem ns "m" "limUpp" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) - >>= elemToBase ns - lim <- maybeToD $ findChild (elemName ns "m" "lim") element - limElems <- mapD (elemToMathElem ns) (elChildren lim) - return $ UpperLimit base limElems -elemToMathElem ns element | isElem ns "m" "m" element = do - let rows = findChildren (elemName ns "m" "mr") element - bases <- mapD (\mr -> mapD (elemToBase ns) (elChildren mr)) rows - return $ Matrix bases -elemToMathElem ns element | isElem ns "m" "nary" element = do - let style = elemToNAryStyle ns element - sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ NAry style sub sup base -elemToMathElem ns element | isElem ns "m" "rad" element = do - deg <- maybeToD (findChild (elemName ns "m" "deg") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ Radical deg base -elemToMathElem ns element | isElem ns "m" "phant" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ Phantom base -elemToMathElem ns element | isElem ns "m" "sPre" element = do - sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ PreSubSuper sub sup base -elemToMathElem ns element | isElem ns "m" "sSub" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - return $ Sub base sub -elemToMathElem ns element | isElem ns "m" "sSubSup" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - return $ SubSuper base sub sup -elemToMathElem ns element | isElem ns "m" "sSup" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - return $ Super base sup -elemToMathElem ns element | isElem ns "m" "r" element = do - let mrPr = elemToOMathRunStyle ns element - wrPr = elemToRunStyle ns element - relems <- elemToRunElems ns element - return $ OMathRun mrPr $ Run wrPr relems -elemToMathElem _ _ = throwError WrongElem - lookupRelationship :: RelId -> [Relationship] -> Maybe Target lookupRelationship relid rels = lookup relid (map (\(Relationship pair) -> pair) rels) -expandDrawingId :: String -> D ParPart +expandDrawingId :: String -> D (FilePath, B.ByteString) expandDrawingId s = do target <- asks (lookupRelationship s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup (combine "word" filepath) . envMedia) case bytes of - Just bs -> return $ Drawing filepath bs + Just bs -> return (filepath, bs) Nothing -> throwError DocxError Nothing -> throwError DocxError @@ -894,7 +536,7 @@ elemToParPart ns element >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of - Just s -> expandDrawingId s + Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp bs) Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "r" element = @@ -933,7 +575,7 @@ elemToParPart ns element Nothing -> ExternalHyperLink "" runs elemToParPart ns element | isElem ns "m" "oMath" element = - elemToMath ns element >>= (return . PlainOMath) + (maybeToD $ elemToExps ns element) >>= (return . PlainOMath) elemToParPart _ _ = throwError WrongElem lookupFootnote :: String -> Notes -> Maybe Element @@ -945,6 +587,17 @@ lookupEndnote s (Notes _ _ ens) = ens >>= (M.lookup s) elemToRun :: NameSpaces -> Element -> D Run elemToRun ns element | isElem ns "w" "r" element + , Just drawingElem <- findChild (elemName ns "w" "drawing") element = + let a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" + drawing = findElement (QName "blip" (Just a_ns) (Just "a")) drawingElem + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + in + case drawing of + Just s -> expandDrawingId s >>= + (\(fp, bs) -> return $ InlineDrawing fp bs) + Nothing -> throwError WrongElem +elemToRun ns element + | isElem ns "w" "r" element , Just ref <- findChild (elemName ns "w" "footnoteReference") element , Just fnId <- findAttr (elemName ns "w" "id") ref = do notes <- asks envNotes |