From a7967d1aeff0336ef4fb50175865747a88ec1a17 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 7 Aug 2014 12:20:22 -0400 Subject: Docx reader: Split math out into math module. Could use some cleanup, but this is the first step for getting an OMML reader into TeXMath. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 373 +--------------------------------- 1 file changed, 7 insertions(+), 366 deletions(-) (limited to 'src/Text/Pandoc/Readers/Docx') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 96210c31a..6411395c9 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -36,19 +36,6 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , BodyPart(..) , TblLook(..) , ParPart(..) - , OMath(..) - , OMathElem(..) - , Base(..) - , TopBottom(..) - , AccentStyle(..) - , BarStyle(..) - , NAryStyle(..) - , DelimStyle(..) - , GroupStyle(..) - , OMathRunStyle(..) - , OMathRunTextStyle(..) - , OMathTextScript(..) - , OMathTextStyle(..) , Run(..) , RunElem(..) , Notes @@ -74,6 +61,8 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Control.Monad.Reader import qualified Data.Map as M import Text.Pandoc.Compat.Except +import Text.Pandoc.Readers.Docx.OMath (elemToExps) +import Text.TeXMath (Exp) data ReaderEnv = ReaderEnv { envNotes :: Notes , envNumbering :: Numbering @@ -161,7 +150,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] | ListItem ParagraphStyle String String Level [ParPart] | Tbl String TblGrid TblLook [Row] - | OMathPara OMathParaStyle [OMath] + | OMathPara [[Exp]] deriving Show type TblGrid = [Integer] @@ -185,100 +174,9 @@ data ParPart = PlainRun Run | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] | Drawing FilePath B.ByteString - | PlainOMath OMath + | PlainOMath [Exp] deriving Show -data OMath = OMath [OMathElem] - deriving Show - -data OMathElem = Accent AccentStyle Base - | Bar BarStyle Base - | Box Base - | BorderBox Base - | Delimiter DelimStyle [Base] - | EquationArray [Base] - | Fraction [OMathElem] [OMathElem] - | Function [OMathElem] Base - | Group GroupStyle Base - | LowerLimit Base [OMathElem] - | UpperLimit Base [OMathElem] - | Matrix [[Base]] - | NAry NAryStyle [OMathElem] [OMathElem] Base - | Phantom Base - | Radical [OMathElem] Base - | PreSubSuper [OMathElem] [OMathElem] Base - | Sub Base [OMathElem] - | SubSuper Base [OMathElem] [OMathElem] - | Super Base [OMathElem] - | OMathRun OMathRunStyle Run - deriving Show - -data Base = Base [OMathElem] - deriving Show - --- placeholders -type OMathParaStyle = [String] - -data TopBottom = Top | Bottom - deriving Show - -data AccentStyle = AccentStyle { accentChar :: Maybe Char } - deriving Show - -data BarStyle = BarStyle { barPos :: TopBottom} - deriving Show - -data NAryStyle = NAryStyle { nAryChar :: Maybe Char - , nAryLimLoc :: LimLoc} - deriving Show - -data OMathRunStyle = OMathRunStyle { oMathLit :: Bool - , oMathRunTextStyle :: OMathRunTextStyle } - deriving Show - -data OMathRunTextStyle = NoStyle - | Normal - | Styled { oMathScript :: Maybe OMathTextScript - , oMathStyle :: Maybe OMathTextStyle } - deriving Show - -data OMathTextScript = ORoman - | OScript - | OFraktur - | ODoubleStruck - | OSansSerif - | OMonospace - deriving (Show, Eq) - -data OMathTextStyle = OPlain - | OBold - | OItalic - | OBoldItalic - deriving (Show, Eq) - -defaultNAryStyle :: NAryStyle -defaultNAryStyle = NAryStyle { nAryChar = Nothing -- integral, in practice - , nAryLimLoc = SubSup } - -data LimLoc = SubSup | UnderOver deriving Show - -data DelimStyle = DelimStyle { delimBegChar :: Maybe Char - , delimSepChar :: Maybe Char - , delimEndChar :: Maybe Char} - deriving Show - -defaultDelimStyle :: DelimStyle -defaultDelimStyle = DelimStyle { delimBegChar = Nothing - , delimSepChar = Nothing - , delimEndChar = Nothing } - -data GroupStyle = GroupStyle { groupChr :: Maybe Char - , groupPos :: Maybe TopBottom } - deriving Show - -defaultGroupStyle :: GroupStyle -defaultGroupStyle = GroupStyle {groupChr = Nothing, groupPos = Nothing} - data Run = Run RunStyle [RunElem] | Footnote [BodyPart] | Endnote [BodyPart] @@ -576,9 +474,8 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildren (elemName ns "m" "oMathPara") element = do - let style = [] -- placeholder - maths <- mapD (elemToMath ns) (elChildren c) - return $ OMathPara style maths + expsLst <- mapD (\e -> (maybeToD $ elemToExps ns e)) (elChildren c) + return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element , Just (numId, lvl) <- elemToNumInfo ns element = do @@ -614,262 +511,6 @@ elemToBodyPart ns element return $ Tbl caption grid tblLook rows elemToBodyPart _ _ = throwError WrongElem -elemToMath :: NameSpaces -> Element -> D OMath -elemToMath ns element | isElem ns "m" "oMath" element = - mapD (elemToMathElem ns) (elChildren element) >>= - (\es -> return $ OMath es) -elemToMath _ _ = throwError WrongElem - -elemToBase :: NameSpaces -> Element -> D Base -elemToBase ns element | isElem ns "m" "e" element = - mapD (elemToMathElem ns) (elChildren element) >>= - (\es -> return $ Base es) -elemToBase _ _ = throwError WrongElem - --- TODO: The right way to do this is to use the ampersand to break the --- text lines into multiple columns. That's tricky, though, and this --- will get us most of the way for the time being. -filterAmpersand :: OMathElem -> OMathElem -filterAmpersand (OMathRun mrPr (Run wrPr elems)) = - let f (TextRun s) = TextRun $ filter ('&' /=) s - f re = re - in - OMathRun mrPr $ Run wrPr (map f elems) -filterAmpersand e = e - -elemToBaseNoAmpersand :: NameSpaces -> Element -> D Base -elemToBaseNoAmpersand ns element | isElem ns "m" "e" element = - mapD - (\e -> (elemToMathElem ns e >>= (return . filterAmpersand))) - (elChildren element) >>= - (\es -> return $ Base es) -elemToBaseNoAmpersand _ _ = throwError WrongElem - - -elemToOMathRunStyle :: NameSpaces -> Element -> OMathRunStyle -elemToOMathRunStyle ns element = - let lit = - case - findChild (elemName ns "m" "lit") element >>= - findAttr (elemName ns "m" "val") - of - Just "on" -> True - _ -> False - in - OMathRunStyle { oMathLit = lit - , oMathRunTextStyle = (elemToOMathRunTextStyle ns element) - } - -elemToOMathRunTextStyle :: NameSpaces -> Element -> OMathRunTextStyle -elemToOMathRunTextStyle ns element - | Just mrPr <- findChild (elemName ns "m" "rPr") element - , Just _ <- findChild (elemName ns "m" "nor") mrPr = - Normal - | Just mrPr <- findChild (elemName ns "m" "rPr") element = - let scr = - case - findChild (elemName ns "m" "scr") mrPr >>= - findAttr (elemName ns "m" "val") - of - Just "roman" -> Just ORoman - Just "script" -> Just OScript - Just "fraktur" -> Just OFraktur - Just "double-struck" -> Just ODoubleStruck - Just "sans-serif" -> Just OSansSerif - Just "monospace" -> Just OMonospace - _ -> Nothing - - sty = - case - findChild (elemName ns "m" "sty") mrPr >>= - findAttr (elemName ns "m" "val") - of - Just "p" -> Just OPlain - Just "b" -> Just OBold - Just "i" -> Just OItalic - Just "bi" -> Just OBoldItalic - _ -> Nothing - in - Styled { oMathScript = scr, oMathStyle = sty } - | otherwise = NoStyle - - - -elemToNAryStyle :: NameSpaces -> Element -> NAryStyle -elemToNAryStyle ns element - | Just narypr <- findChild (QName "naryPr" (lookup "m" ns) (Just "m")) element = - let - chr = findChild (QName "chr" (lookup "m" ns) (Just "m")) narypr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - Just . head - limLoc = findChild (QName "limLoc" (lookup "m" ns) (Just "m")) narypr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) - limLoc' = case limLoc of - Just "undOver" -> UnderOver - Just "subSup" -> SubSup - _ -> SubSup - in - NAryStyle { nAryChar = chr, nAryLimLoc = limLoc'} -elemToNAryStyle _ _ = defaultNAryStyle - -elemToDelimStyle :: NameSpaces -> Element -> DelimStyle -elemToDelimStyle ns element - | Just dPr <- findChild (QName "dPr" (lookup "m" ns) (Just "m")) element = - let begChr = findChild (QName "begChr" (lookup "m" ns) (Just "m")) dPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\c -> if null c then (Just ' ') else (Just $ head c)) - sepChr = findChild (QName "sepChr" (lookup "m" ns) (Just "m")) dPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\c -> if null c then (Just ' ') else (Just $ head c)) - endChr = findChild (QName "endChr" (lookup "m" ns) (Just "m")) dPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\c -> if null c then (Just ' ') else (Just $ head c)) - in - DelimStyle { delimBegChar = begChr - , delimSepChar = sepChr - , delimEndChar = endChr} -elemToDelimStyle _ _ = defaultDelimStyle - -elemToGroupStyle :: NameSpaces -> Element -> GroupStyle -elemToGroupStyle ns element - | Just gPr <- findChild (QName "groupChrPr" (lookup "m" ns) (Just "m")) element = - let chr = findChild (QName "chr" (lookup "m" ns) (Just "m")) gPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - Just . head - pos = findChild (QName "pos" (lookup "m" ns) (Just "m")) gPr >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\s -> Just $ if s == "top" then Top else Bottom) - in - GroupStyle { groupChr = chr, groupPos = pos } -elemToGroupStyle _ _ = defaultGroupStyle - -elemToMathElem :: NameSpaces -> Element -> D OMathElem -elemToMathElem ns element | isElem ns "m" "acc" element = do - let accChar = - findChild (elemName ns "m" "accPr") element >>= - findChild (elemName ns "m" "chr") >>= - findAttr (elemName ns "m" "val") >>= - Just . head - accPr = AccentStyle { accentChar = accChar} - base <-(maybeToD $ findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ Accent accPr base -elemToMathElem ns element | isElem ns "m" "bar" element = do - barPr <- maybeToD $ - findChild (QName "barPr" (lookup "m" ns) (Just "m")) element >>= - findChild (QName "pos" (lookup "m" ns) (Just "m")) >>= - findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= - (\s -> - Just $ BarStyle { - barPos = (if s == "bot" then Bottom else Top) - }) - base <-maybeToD (findChild (QName "e" (lookup "m" ns) (Just "m")) element) >>= - elemToBase ns - return $ Bar barPr base -elemToMathElem ns element | isElem ns "m" "box" element = - maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns >>= - (\b -> return $ Box b) -elemToMathElem ns element | isElem ns "m" "borderBox" element = - maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns >>= - (\b -> return $ BorderBox b) -elemToMathElem ns element | isElem ns "m" "d" element = - let style = elemToDelimStyle ns element - in - mapD (elemToBase ns) (elChildren element) >>= - (\es -> return $ Delimiter style es) -elemToMathElem ns element | isElem ns "m" "eqArr" element = - mapD (elemToBaseNoAmpersand ns) (elChildren element) >>= - (\es -> return $ EquationArray es) -elemToMathElem ns element | isElem ns "m" "f" element = do - num <- maybeToD $ findChild (elemName ns "m" "num") element - den <- maybeToD $ findChild (elemName ns "m" "den") element - numElems <- mapD (elemToMathElem ns) (elChildren num) - denElems <- mapD (elemToMathElem ns) (elChildren den) - return $ Fraction numElems denElems -elemToMathElem ns element | isElem ns "m" "func" element = do - fName <- maybeToD $ findChild (elemName ns "m" "fName") element - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - fnElems <- mapD (elemToMathElem ns) (elChildren fName) - return $ Function fnElems base -elemToMathElem ns element | isElem ns "m" "groupChr" element = - let style = elemToGroupStyle ns element - in - maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns >>= - (\b -> return $ Group style b) -elemToMathElem ns element | isElem ns "m" "limLow" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) - >>= elemToBase ns - lim <- maybeToD $ findChild (elemName ns "m" "lim") element - limElems <- mapD (elemToMathElem ns) (elChildren lim) - return $ LowerLimit base limElems -elemToMathElem ns element | isElem ns "m" "limUpp" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) - >>= elemToBase ns - lim <- maybeToD $ findChild (elemName ns "m" "lim") element - limElems <- mapD (elemToMathElem ns) (elChildren lim) - return $ UpperLimit base limElems -elemToMathElem ns element | isElem ns "m" "m" element = do - let rows = findChildren (elemName ns "m" "mr") element - bases <- mapD (\mr -> mapD (elemToBase ns) (elChildren mr)) rows - return $ Matrix bases -elemToMathElem ns element | isElem ns "m" "nary" element = do - let style = elemToNAryStyle ns element - sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ NAry style sub sup base -elemToMathElem ns element | isElem ns "m" "rad" element = do - deg <- maybeToD (findChild (elemName ns "m" "deg") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ Radical deg base -elemToMathElem ns element | isElem ns "m" "phant" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ Phantom base -elemToMathElem ns element | isElem ns "m" "sPre" element = do - sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - return $ PreSubSuper sub sup base -elemToMathElem ns element | isElem ns "m" "sSub" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - return $ Sub base sub -elemToMathElem ns element | isElem ns "m" "sSubSup" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - sub <- maybeToD (findChild (elemName ns "m" "sub") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - return $ SubSuper base sub sup -elemToMathElem ns element | isElem ns "m" "sSup" element = do - base <- maybeToD (findChild (elemName ns "m" "e") element) >>= - elemToBase ns - sup <- maybeToD (findChild (elemName ns "m" "sup") element) >>= - (\e -> mapD (elemToMathElem ns) (elChildren e)) - return $ Super base sup -elemToMathElem ns element | isElem ns "m" "r" element = do - let mrPr = elemToOMathRunStyle ns element - wrPr = elemToRunStyle ns element - relems <- elemToRunElems ns element - return $ OMathRun mrPr $ Run wrPr relems -elemToMathElem _ _ = throwError WrongElem - lookupRelationship :: RelId -> [Relationship] -> Maybe Target lookupRelationship relid rels = lookup relid (map (\(Relationship pair) -> pair) rels) @@ -933,7 +574,7 @@ elemToParPart ns element Nothing -> ExternalHyperLink "" runs elemToParPart ns element | isElem ns "m" "oMath" element = - elemToMath ns element >>= (return . PlainOMath) + (maybeToD $ elemToExps ns element) >>= (return . PlainOMath) elemToParPart _ _ = throwError WrongElem lookupFootnote :: String -> Notes -> Maybe Element -- cgit v1.2.3 From d293dd528b7f5f97a6edde6a7ae9381b36828f60 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Thu, 7 Aug 2014 12:41:33 -0400 Subject: OMath module: Add new file. --- src/Text/Pandoc/Readers/Docx/OMath.hs | 622 ++++++++++++++++++++++++++++++++++ 1 file changed, 622 insertions(+) create mode 100644 src/Text/Pandoc/Readers/Docx/OMath.hs (limited to 'src/Text/Pandoc/Readers/Docx') diff --git a/src/Text/Pandoc/Readers/Docx/OMath.hs b/src/Text/Pandoc/Readers/Docx/OMath.hs new file mode 100644 index 000000000..309aaefe8 --- /dev/null +++ b/src/Text/Pandoc/Readers/Docx/OMath.hs @@ -0,0 +1,622 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2014 Jesse Rosenthal + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Docx.Math + Copyright : Copyright (C) 2014 Jesse Rosenthal + License : GNU GPL, version 2 or above + + Maintainer : Jesse Rosenthal + Stability : alpha + Portability : portable + +Types and functions for conversion of OMML into TeXMath. +-} + +module Text.Pandoc.Readers.Docx.OMath ( elemToExps + ) where + +import Text.XML.Light +import Data.Maybe (mapMaybe, fromMaybe) +import Data.List (intersperse) +import qualified Text.TeXMath.Types as TM +import Control.Applicative ((<$>)) + +type NameSpaces = [(String, String)] + +elemName :: NameSpaces -> String -> String -> QName +elemName ns prefix name = (QName name (lookup prefix ns) (Just prefix)) + +isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem ns prefix name element = + qName (elName element) == name && + qURI (elName element) == (lookup prefix ns) + + +data OMath = OMath [OMathElem] + deriving Show + +data OMathElem = Accent AccentStyle Base + | Bar BarStyle Base + | Box Base + | BorderBox Base + | Delimiter DelimStyle [Base] + | EquationArray [Base] + | Fraction [OMathElem] [OMathElem] + | Function [OMathElem] Base + | Group GroupStyle Base + | LowerLimit Base [OMathElem] + | UpperLimit Base [OMathElem] + | Matrix [[Base]] + | NAry NAryStyle [OMathElem] [OMathElem] Base + | Phantom Base + | Radical [OMathElem] Base + | PreSubSuper [OMathElem] [OMathElem] Base + | Sub Base [OMathElem] + | SubSuper Base [OMathElem] [OMathElem] + | Super Base [OMathElem] + | OMathRun OMathRunStyle [OMathRunElem] + deriving Show + +data OMathRunElem = TextRun String + | LnBrk + | Tab + deriving Show + +data Base = Base [OMathElem] + deriving Show + +data TopBottom = Top | Bottom + deriving Show + +data AccentStyle = AccentStyle { accentChar :: Maybe Char } + deriving Show + +data BarStyle = BarStyle { barPos :: TopBottom} + deriving Show + +data NAryStyle = NAryStyle { nAryChar :: Maybe Char + , nAryLimLoc :: LimLoc} + deriving Show + +data OMathRunStyle = OMathRunStyle { oMathLit :: Bool + , oMathRunTextStyle :: OMathRunTextStyle } + deriving Show + +data OMathRunTextStyle = NoStyle + | Normal + | Styled { oMathScript :: Maybe OMathTextScript + , oMathStyle :: Maybe OMathTextStyle } + deriving Show + +data OMathTextScript = ORoman + | OScript + | OFraktur + | ODoubleStruck + | OSansSerif + | OMonospace + deriving (Show, Eq) + +data OMathTextStyle = OPlain + | OBold + | OItalic + | OBoldItalic + deriving (Show, Eq) + +defaultNAryStyle :: NAryStyle +defaultNAryStyle = NAryStyle { nAryChar = Nothing -- integral, in practice + , nAryLimLoc = SubSup } + +data LimLoc = SubSup | UnderOver deriving Show + +data DelimStyle = DelimStyle { delimBegChar :: Maybe Char + , delimSepChar :: Maybe Char + , delimEndChar :: Maybe Char} + deriving Show + +defaultDelimStyle :: DelimStyle +defaultDelimStyle = DelimStyle { delimBegChar = Nothing + , delimSepChar = Nothing + , delimEndChar = Nothing } + +data GroupStyle = GroupStyle { groupChr :: Maybe Char + , groupPos :: Maybe TopBottom } + deriving Show + +defaultGroupStyle :: GroupStyle +defaultGroupStyle = GroupStyle {groupChr = Nothing, groupPos = Nothing} + +elemToMath :: NameSpaces -> Element -> Maybe OMath +elemToMath ns element | isElem ns "m" "oMath" element = + Just $ OMath $ mapMaybe (elemToMathElem ns) (elChildren element) +elemToMath _ _ = Nothing + +elemToBase :: NameSpaces -> Element -> Maybe Base +elemToBase ns element | isElem ns "m" "e" element = + Just $ Base $ mapMaybe (elemToMathElem ns) (elChildren element) +elemToBase _ _ = Nothing + +-- TODO: The right way to do this is to use the ampersand to break the +-- text lines into multiple columns. That's tricky, though, and this +-- will get us most of the way for the time being. +filterAmpersand :: OMathElem -> OMathElem +filterAmpersand (OMathRun mrPr elems) = + let f (TextRun s) = TextRun $ filter ('&' /=) s + f re = re + in + OMathRun mrPr (map f elems) +filterAmpersand e = e + +elemToBaseNoAmpersand :: NameSpaces -> Element -> Maybe Base +elemToBaseNoAmpersand ns element | isElem ns "m" "e" element = + return $ Base $ + mapMaybe + (\e -> (elemToMathElem ns e >>= (return . filterAmpersand))) + (elChildren element) +elemToBaseNoAmpersand _ _ = Nothing + +elemToOMathRunStyle :: NameSpaces -> Element -> OMathRunStyle +elemToOMathRunStyle ns element = + let lit = + case + findChild (elemName ns "m" "lit") element >>= + findAttr (elemName ns "m" "val") + of + Just "on" -> True + _ -> False + in + OMathRunStyle { oMathLit = lit + , oMathRunTextStyle = (elemToOMathRunTextStyle ns element) + } + +elemToOMathRunTextStyle :: NameSpaces -> Element -> OMathRunTextStyle +elemToOMathRunTextStyle ns element + | Just mrPr <- findChild (elemName ns "m" "rPr") element + , Just _ <- findChild (elemName ns "m" "nor") mrPr = + Normal + | Just mrPr <- findChild (elemName ns "m" "rPr") element = + let scr = + case + findChild (elemName ns "m" "scr") mrPr >>= + findAttr (elemName ns "m" "val") + of + Just "roman" -> Just ORoman + Just "script" -> Just OScript + Just "fraktur" -> Just OFraktur + Just "double-struck" -> Just ODoubleStruck + Just "sans-serif" -> Just OSansSerif + Just "monospace" -> Just OMonospace + _ -> Nothing + + sty = + case + findChild (elemName ns "m" "sty") mrPr >>= + findAttr (elemName ns "m" "val") + of + Just "p" -> Just OPlain + Just "b" -> Just OBold + Just "i" -> Just OItalic + Just "bi" -> Just OBoldItalic + _ -> Nothing + in + Styled { oMathScript = scr, oMathStyle = sty } + | otherwise = NoStyle + + + +elemToNAryStyle :: NameSpaces -> Element -> NAryStyle +elemToNAryStyle ns element + | Just narypr <- findChild (QName "naryPr" (lookup "m" ns) (Just "m")) element = + let + chr = findChild (QName "chr" (lookup "m" ns) (Just "m")) narypr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + Just . head + limLoc = findChild (QName "limLoc" (lookup "m" ns) (Just "m")) narypr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) + limLoc' = case limLoc of + Just "undOver" -> UnderOver + Just "subSup" -> SubSup + _ -> SubSup + in + NAryStyle { nAryChar = chr, nAryLimLoc = limLoc'} +elemToNAryStyle _ _ = defaultNAryStyle + +elemToDelimStyle :: NameSpaces -> Element -> DelimStyle +elemToDelimStyle ns element + | Just dPr <- findChild (QName "dPr" (lookup "m" ns) (Just "m")) element = + let begChr = findChild (QName "begChr" (lookup "m" ns) (Just "m")) dPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\c -> if null c then (Just ' ') else (Just $ head c)) + sepChr = findChild (QName "sepChr" (lookup "m" ns) (Just "m")) dPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\c -> if null c then (Just ' ') else (Just $ head c)) + endChr = findChild (QName "endChr" (lookup "m" ns) (Just "m")) dPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\c -> if null c then (Just ' ') else (Just $ head c)) + in + DelimStyle { delimBegChar = begChr + , delimSepChar = sepChr + , delimEndChar = endChr} +elemToDelimStyle _ _ = defaultDelimStyle + +elemToGroupStyle :: NameSpaces -> Element -> GroupStyle +elemToGroupStyle ns element + | Just gPr <- findChild (QName "groupChrPr" (lookup "m" ns) (Just "m")) element = + let chr = findChild (QName "chr" (lookup "m" ns) (Just "m")) gPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + Just . head + pos = findChild (QName "pos" (lookup "m" ns) (Just "m")) gPr >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\s -> Just $ if s == "top" then Top else Bottom) + in + GroupStyle { groupChr = chr, groupPos = pos } +elemToGroupStyle _ _ = defaultGroupStyle + +elemToMathElem :: NameSpaces -> Element -> Maybe OMathElem +elemToMathElem ns element | isElem ns "m" "acc" element = do + let accChar = + findChild (elemName ns "m" "accPr") element >>= + findChild (elemName ns "m" "chr") >>= + findAttr (elemName ns "m" "val") >>= + Just . head + accPr = AccentStyle { accentChar = accChar} + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ Accent accPr base +elemToMathElem ns element | isElem ns "m" "bar" element = do + barPr <- findChild (QName "barPr" (lookup "m" ns) (Just "m")) element >>= + findChild (QName "pos" (lookup "m" ns) (Just "m")) >>= + findAttr (QName "val" (lookup "m" ns) (Just "m")) >>= + (\s -> + Just $ BarStyle { + barPos = (if s == "bot" then Bottom else Top) + }) + base <- findChild (QName "e" (lookup "m" ns) (Just "m")) element >>= + elemToBase ns + return $ Bar barPr base +elemToMathElem ns element | isElem ns "m" "box" element = + findChild (elemName ns "m" "e") element >>= + elemToBase ns >>= + (\b -> return $ Box b) +elemToMathElem ns element | isElem ns "m" "borderBox" element = + findChild (elemName ns "m" "e") element >>= + elemToBase ns >>= + (\b -> return $ BorderBox b) +elemToMathElem ns element | isElem ns "m" "d" element = + let style = elemToDelimStyle ns element + in + return $ Delimiter style $ mapMaybe (elemToBase ns) (elChildren element) +elemToMathElem ns element | isElem ns "m" "eqArr" element = + return $ EquationArray $ mapMaybe (elemToBaseNoAmpersand ns) (elChildren element) +elemToMathElem ns element | isElem ns "m" "f" element = do + num <- findChild (elemName ns "m" "num") element + den <- findChild (elemName ns "m" "den") element + let numElems = mapMaybe (elemToMathElem ns) (elChildren num) + denElems = mapMaybe (elemToMathElem ns) (elChildren den) + return $ Fraction numElems denElems +elemToMathElem ns element | isElem ns "m" "func" element = do + fName <- findChild (elemName ns "m" "fName") element + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + let fnElems = mapMaybe (elemToMathElem ns) (elChildren fName) + return $ Function fnElems base +elemToMathElem ns element | isElem ns "m" "groupChr" element = + let style = elemToGroupStyle ns element + in + findChild (elemName ns "m" "e") element >>= + elemToBase ns >>= + (\b -> return $ Group style b) +elemToMathElem ns element | isElem ns "m" "limLow" element = do + base <- findChild (elemName ns "m" "e") element + >>= elemToBase ns + lim <- findChild (elemName ns "m" "lim") element + let limElems = mapMaybe (elemToMathElem ns) (elChildren lim) + return $ LowerLimit base limElems +elemToMathElem ns element | isElem ns "m" "limUpp" element = do + base <- findChild (elemName ns "m" "e") element + >>= elemToBase ns + lim <- findChild (elemName ns "m" "lim") element + let limElems = mapMaybe (elemToMathElem ns) (elChildren lim) + return $ UpperLimit base limElems +elemToMathElem ns element | isElem ns "m" "m" element = do + let rows = findChildren (elemName ns "m" "mr") element + let bases = mapMaybe (\mr -> mapM (elemToBase ns) (elChildren mr)) rows + return $ Matrix bases +elemToMathElem ns element | isElem ns "m" "nary" element = do + let style = elemToNAryStyle ns element + sub <- findChild (elemName ns "m" "sub") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + sup <- findChild (elemName ns "m" "sup") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ NAry style sub sup base +elemToMathElem ns element | isElem ns "m" "rad" element = do + deg <- findChild (elemName ns "m" "deg") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ Radical deg base +elemToMathElem ns element | isElem ns "m" "phant" element = do + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ Phantom base +elemToMathElem ns element | isElem ns "m" "sPre" element = do + sub <- findChild (elemName ns "m" "sub") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + sup <- findChild (elemName ns "m" "sup") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + return $ PreSubSuper sub sup base +elemToMathElem ns element | isElem ns "m" "sSub" element = do + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + sub <- findChild (elemName ns "m" "sub") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + return $ Sub base sub +elemToMathElem ns element | isElem ns "m" "sSubSup" element = do + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + sub <- findChild (elemName ns "m" "sub") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + sup <- findChild (elemName ns "m" "sup") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + return $ SubSuper base sub sup +elemToMathElem ns element | isElem ns "m" "sSup" element = do + base <- findChild (elemName ns "m" "e") element >>= + elemToBase ns + sup <- findChild (elemName ns "m" "sup") element >>= + (\e -> return $ mapMaybe (elemToMathElem ns) (elChildren e)) + return $ Super base sup +elemToMathElem ns element | isElem ns "m" "r" element = do + let mrPr = elemToOMathRunStyle ns element + mrElems <- elemToOMathRunElems ns element + return $ OMathRun mrPr mrElems +elemToMathElem _ _ = Nothing + +elemToOMathRunElem :: NameSpaces -> Element -> Maybe OMathRunElem +elemToOMathRunElem ns element + | isElem ns "w" "t" element + || isElem ns "m" "t" element + || isElem ns "w" "delText" element = Just $ TextRun $ strContent element + | isElem ns "w" "br" element = Just LnBrk + | isElem ns "w" "tab" element = Just Tab + | otherwise = Nothing + +elemToOMathRunElems :: NameSpaces -> Element -> Maybe [OMathRunElem] +elemToOMathRunElems ns element + | isElem ns "w" "r" element + || isElem ns "m" "r" element = + Just $ mapMaybe (elemToOMathRunElem ns) (elChildren element) +elemToOMathRunElems _ _ = Nothing + +----- And now the TeXMath Creation + +oMathRunElemToString :: OMathRunElem -> String +oMathRunElemToString (TextRun s) = s +oMathRunElemToString (LnBrk) = ['\n'] +oMathRunElemToString (Tab) = ['\t'] + +oMathRunElemsToString :: [OMathRunElem] -> String +oMathRunElemsToString = concatMap oMathRunElemToString + +oMathElemToString :: OMathElem -> String +oMathElemToString (OMathRun _ oMathRunElems) = + oMathRunElemsToString oMathRunElems +oMathElemToString _ = "" + + +oMathToExps :: OMath -> [TM.Exp] +oMathToExps (OMath oMathElems) = concatMap oMathElemToExps oMathElems + +oMathElemToExps :: OMathElem -> [TM.Exp] +oMathElemToExps (Accent style base) = + let baseExp = baseToExp base + chr = case accentChar style of + Just c -> c + Nothing -> '\180' -- default to acute. + in + [TM.EOver False baseExp (TM.ESymbol TM.Accent [chr])] +oMathElemToExps(Bar style base) = + let baseExp = baseToExp base + in + case barPos style of + Top -> [TM.EOver False baseExp (TM.ESymbol TM.Accent "\175")] + Bottom -> [TM.EUnder False baseExp (TM.ESymbol TM.Accent "\818")] +oMathElemToExps (Box base) = [baseToExp base] +oMathElemToExps (BorderBox base) = + -- TODO: This should be "\\boxed" somehow + [baseToExp base] +oMathElemToExps (Delimiter dPr bases) = + let baseExps = map baseToExp bases + inDelimExps = map Right baseExps + beg = fromMaybe '(' (delimBegChar dPr) + end = fromMaybe ')' (delimEndChar dPr) + sep = fromMaybe '|' (delimSepChar dPr) + exps = intersperse (Left [sep]) inDelimExps + in + [TM.EDelimited [beg] [end] exps] +oMathElemToExps (EquationArray bases) = + let baseExps = map (\b -> [baseToExp' b]) bases + in + [TM.EArray [] baseExps] +oMathElemToExps (Fraction num denom) = + let numExp = TM.EGrouped $ concatMap oMathElemToExps num + denExp = TM.EGrouped $ concatMap oMathElemToExps denom + in + [TM.EFraction TM.NormalFrac numExp denExp] +oMathElemToExps (Function fname base) = + -- We need a string for the fname, but omml gives it to us as a + -- series of oMath elems. We're going to filter out the oMathRuns, + -- which should work for us most of the time. + let fnameString = concatMap oMathElemToString fname + baseExp = baseToExp base + in + [TM.EMathOperator fnameString, baseExp] +oMathElemToExps (Group style base) + | Just Top <- groupPos style = + let baseExp = baseToExp base + chr = case groupChr style of + Just c -> c + Nothing -> '\65079' -- default to overbrace + in + [TM.EOver False baseExp (TM.ESymbol TM.Accent [chr])] + | otherwise = + let baseExp = baseToExp base + chr = case groupChr style of + Just c -> c + Nothing -> '\65080' -- default to underbrace + in + [TM.EUnder False baseExp (TM.ESymbol TM.Accent [chr])] +oMathElemToExps (LowerLimit base limElems) = do + let baseExp = baseToExp base + lim = TM.EGrouped $ concatMap oMathElemToExps limElems + in + [TM.EUnder True lim baseExp] +oMathElemToExps (UpperLimit base limElems) = + let baseExp = baseToExp base + lim = TM.EGrouped $ concatMap oMathElemToExps limElems + in + [TM.EOver True lim baseExp] +oMathElemToExps (Matrix bases) = + let rows = map (map baseToExp') bases + in + [TM.EArray [TM.AlignCenter] rows] +oMathElemToExps (NAry style sub sup base) = + let + subExps = concatMap oMathElemToExps sub + supExps = concatMap oMathElemToExps sup + baseExp = baseToExp base + opChar = case nAryChar style of + Just c -> c + -- default to integral + Nothing -> '\8747' + in [ TM.ESubsup + (TM.ESymbol TM.Op [opChar]) + (TM.EGrouped subExps) + (TM.EGrouped supExps) + , baseExp] +oMathElemToExps (Phantom base) = + [TM.EPhantom $ baseToExp base] +oMathElemToExps (Radical degree base) = + let degExps = concatMap oMathElemToExps degree + baseExp = baseToExp base + in + case degExps of + [] -> [TM.ESqrt baseExp] + ds -> [TM.ERoot (TM.EGrouped ds) baseExp] +oMathElemToExps (PreSubSuper sub sup base) = + let subExps = concatMap oMathElemToExps sub + supExps = concatMap oMathElemToExps sup + baseExp = baseToExp base + in [ TM.ESubsup + (TM.EIdentifier "") (TM.EGrouped subExps) (TM.EGrouped supExps) + , baseExp] +oMathElemToExps (Sub base sub) = + let baseExp = baseToExp base + subExps = concatMap oMathElemToExps sub + in + [TM.ESub baseExp (TM.EGrouped subExps)] +oMathElemToExps (SubSuper base sub sup) = + let baseExp = baseToExp base + subExps = concatMap oMathElemToExps sub + supExps = concatMap oMathElemToExps sup + in + [TM.ESubsup baseExp (TM.EGrouped subExps) (TM.EGrouped supExps)] +oMathElemToExps (Super base sup) = + let baseExp = baseToExp base + supExps = concatMap oMathElemToExps sup + in + [TM.ESuper baseExp (TM.EGrouped supExps)] +oMathElemToExps (OMathRun sty elems) + | NoStyle <- oMathRunTextStyle sty = + [TM.EIdentifier $ oMathRunElemsToString elems] + | Nothing <- oMathRunStyleToTextType sty = + [TM.EIdentifier $ oMathRunElemsToString elems] + | Just textType <- oMathRunStyleToTextType sty = + if oMathLit sty + then [TM.EText textType (oMathRunElemsToString elems)] + else [TM.EStyled textType [TM.EIdentifier $ oMathRunElemsToString elems]] +oMathElemToExps (OMathRun _ _) = [] + +oMathRunStyleToTextType :: OMathRunStyle -> Maybe TM.TextType +oMathRunStyleToTextType mrPr + | Normal <- oMathRunTextStyle mrPr = + Just $ TM.TextNormal + | Styled scr sty <- oMathRunTextStyle mrPr + ,Just OBold <- sty + , Just OSansSerif <- scr = + Just $ TM.TextSansSerifBold + | Styled scr sty <- oMathRunTextStyle mrPr + , Just OBoldItalic <- sty + , Just OSansSerif <- scr = + Just $ TM.TextSansSerifBoldItalic + | Styled scr sty <- oMathRunTextStyle mrPr + , Just OBold <- sty + , Just OScript <- scr = + Just $ TM.TextBoldScript + | Styled scr sty <- oMathRunTextStyle mrPr + , Just OBold <- sty + , Just OFraktur <- scr = + Just $ TM.TextBoldFraktur + | Styled scr sty <- oMathRunTextStyle mrPr + , Just OItalic <- sty + , Just OSansSerif <- scr = + Just $ TM.TextSansSerifItalic + | Styled _ sty <- oMathRunTextStyle mrPr + , Just OBold <- sty = + Just $ TM.TextBold + | Styled _ sty <- oMathRunTextStyle mrPr + , Just OItalic <- sty = + Just $ TM.TextItalic + | Styled scr _ <- oMathRunTextStyle mrPr + , Just OMonospace <- scr = + Just $ TM.TextMonospace + | Styled scr _ <- oMathRunTextStyle mrPr + , Just OSansSerif <- scr = + Just $ TM.TextSansSerif + | Styled scr _ <- oMathRunTextStyle mrPr + , Just ODoubleStruck <- scr = + Just $ TM.TextDoubleStruck + | Styled scr _ <- oMathRunTextStyle mrPr + , Just OScript <- scr = + Just $ TM.TextDoubleStruck + | Styled scr _ <- oMathRunTextStyle mrPr + , Just OFraktur <- scr = + Just $ TM.TextFraktur + | Styled _ sty <- oMathRunTextStyle mrPr + , Just OBoldItalic <- sty = + Just $ TM.TextBoldItalic + | otherwise = Nothing + + + +baseToExp :: Base -> TM.Exp +baseToExp b = TM.EGrouped $ baseToExp' b + +-- an ungrouped version of baseToExp +baseToExp' :: Base -> [TM.Exp] +baseToExp' (Base mathElems) = + concatMap oMathElemToExps mathElems + +elemToExps :: NameSpaces -> Element -> Maybe [TM.Exp] +elemToExps ns element = oMathToExps <$> (elemToMath ns element) + -- cgit v1.2.3