From 404706d29a8f45b43ef2ef13e93d1786dde863a0 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Tue, 16 Jan 2018 10:45:45 -0500 Subject: Docx reader: Parse fldChar tags This will allow us to parse instrTxt inside fldChar tags. --- src/Text/Pandoc/Readers/Docx.hs | 3 ++ src/Text/Pandoc/Readers/Docx/Parse.hs | 86 +++++++++++++++++++++++++++++++++-- 2 files changed, 84 insertions(+), 5 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index e0f32b908..998179d2f 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -446,6 +446,9 @@ parPartToInlines' (PlainOMath exps) = return $ math $ writeTeX exps parPartToInlines' (SmartTag runs) = do smushInlines <$> mapM runToInlines runs +parPartToInlines' (Field _ runs) = do + smushInlines <$> mapM runToInlines runs +parPartToInlines' NullParPart = return mempty isAnchorSpan :: Inline -> Bool isAnchorSpan (Span (_, classes, kvs) _) = diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index b79b39369..b3a0fee8e 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -90,10 +90,19 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show -data ReaderState = ReaderState { stateWarnings :: [String] } +data ReaderState = ReaderState { stateWarnings :: [String] + , stateFldCharState :: FldCharState + } deriving Show -data DocxError = DocxError | WrongElem +data FldCharState = FldCharOpen + | FldCharFieldInfo FieldInfo + | FldCharContent FieldInfo [Run] + | FldCharClosed + deriving (Show) + +data DocxError = DocxError + | WrongElem deriving Show type D = ExceptT DocxError (ReaderT ReaderEnv (State ReaderState)) @@ -265,6 +274,9 @@ data ParPart = PlainRun Run | Chart -- placeholder for now | PlainOMath [Exp] | SmartTag [Run] + | Field FieldInfo [Run] + | NullParPart -- when we need to return nothing, but + -- not because of an error. deriving Show data Run = Run RunStyle [RunElem] @@ -274,6 +286,10 @@ data Run = Run RunStyle [RunElem] | InlineChart -- placeholder deriving Show +data FieldInfo = HyperlinkField URL + | UnknownField + deriving (Show) + data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show @@ -328,7 +344,9 @@ archiveToDocxWithWarnings archive = do (styles, parstyles) = archiveToStyles archive rEnv = ReaderEnv notes comments numbering rels media Nothing styles parstyles InDocument - rState = ReaderState { stateWarnings = [] } + rState = ReaderState { stateWarnings = [] + , stateFldCharState = FldCharClosed + } (eitherDoc, st) = runD (archiveToDocument archive) rEnv rState case eitherDoc of Right doc -> Right (Docx doc, stateWarnings st) @@ -736,9 +754,67 @@ elemToParPart ns element , c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart" , Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem = return Chart +{- +The next one is a bit complicated. fldChar fields work by first +having a in a run, then a run with +, then a run, then the +content runs, and finally a run. For +example (omissions and my comments in brackets): + + + [...] + + + + [...] + HYPERLINK [hyperlink url] + + + [...] + + + + [...] + Foundations of Analysis, 2nd Edition + + + [...] + + + +So we do this in a number of steps. If we encounter the fldchar begin +tag, we start open a fldchar state variable (see state above). We add +the instrtext to it as FieldInfo. Then we close that and start adding +the runs when we get to separate. Then when we get to end, we produce +the Field type with approriate FieldInfo and Runs. +-} +elemToParPart ns element + | isElem ns "w" "r" element + , Just fldChar <- findChildByName ns "w" "fldChar" element + , Just fldCharType <- findAttrByName ns "w" "fldCharType" fldChar = do + fldCharState <- gets stateFldCharState + case fldCharState of + FldCharClosed | fldCharType == "begin" -> do + modify $ \st -> st {stateFldCharState = FldCharOpen} + return NullParPart + FldCharFieldInfo info | fldCharType == "separate" -> do + modify $ \st -> st {stateFldCharState = FldCharContent info []} + return NullParPart + FldCharContent info runs | fldCharType == "end" -> do + modify $ \st -> st {stateFldCharState = FldCharClosed} + return $ Field info runs + _ -> throwError WrongElem elemToParPart ns element - | isElem ns "w" "r" element = - elemToRun ns element >>= (\r -> return $ PlainRun r) + | isElem ns "w" "r" element = do + run <- elemToRun ns element + -- we check to see if we have an open FldChar in state that we're + -- recording. + fldCharState <- gets stateFldCharState + case fldCharState of + FldCharContent info runs -> do + modify $ \st -> st{stateFldCharState = FldCharContent info (run : runs)} + return NullParPart + _ -> return $ PlainRun run elemToParPart ns element | Just change <- getTrackedChange ns element = do runs <- mapD (elemToRun ns) (elChildren element) -- cgit v1.2.3