From 0f98cbff4b61b8e79f386f77d18b3218f1214b25 Mon Sep 17 00:00:00 2001 From: Milan Bracke Date: Fri, 1 Oct 2021 11:34:14 +0200 Subject: Avoid blockquote when parent style has more indent When a paragraph has an indentation different from the parent (named) style, it used to be considered a blockquote. But this only makes sense when the paragraph has more indentation. So this commit adds a check for the indentation of the parent style. --- src/Text/Pandoc/Readers/Docx.hs | 66 ++++++++++++++-------------- src/Text/Pandoc/Readers/Docx/Parse.hs | 27 +++--------- src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 26 +++++++++++ 3 files changed, 66 insertions(+), 53 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index dd6f39431..66cd84291 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -535,34 +535,36 @@ trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils extraAttr :: (Eq (StyleName a), HasStyleName a) => a -> Attr extraAttr s = ("", [], [("custom-style", fromStyleName $ getStyleName s)]) -parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks) -parStyleToTransform pPr = case pStyle pPr of - c@(getStyleName -> styleName):cs - | styleName `elem` divsToKeep -> do - let pPr' = pPr { pStyle = cs } - transform <- parStyleToTransform pPr' - return $ divWith ("", [normalizeToClassName styleName], []) . transform - | styleName `elem` listParagraphStyles -> do - let pPr' = pPr { pStyle = cs, indentation = Nothing} - transform <- parStyleToTransform pPr' - return $ divWith ("", [normalizeToClassName styleName], []) . transform - | otherwise -> do - let pPr' = pPr { pStyle = cs } - transform <- parStyleToTransform pPr' - styles <- asks (isEnabled Ext_styles . docxOptions) - return $ - (if styles then divWith (extraAttr c) else id) - . (if isBlockQuote c then blockQuote else id) - . transform - [] - | Just left <- indentation pPr >>= leftParIndent -> do - let pPr' = pPr { indentation = Nothing } - hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent - transform <- parStyleToTransform pPr' - return $ if (left - hang) > 0 - then blockQuote . transform - else transform - | otherwise -> return id +paragraphStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks) +paragraphStyleToTransform pPr = + let stylenames = map getStyleName (pStyle pPr) + transform = if (`elem` listParagraphStyles) `any` stylenames || relativeIndent pPr <= 0 + then id + else blockQuote + in do + extStylesEnabled <- asks (isEnabled Ext_styles . docxOptions) + return $ foldr (\parStyle transform' -> + (parStyleToTransform extStylesEnabled parStyle) . transform' + ) transform (pStyle pPr) + +parStyleToTransform :: Bool -> ParStyle -> Blocks -> Blocks +parStyleToTransform extStylesEnabled parStyle@(getStyleName -> styleName) + | (styleName `elem` divsToKeep) || (styleName `elem` listParagraphStyles) = + divWith ("", [normalizeToClassName styleName], []) + | otherwise = + (if extStylesEnabled then divWith (extraAttr parStyle) else id) + . (if isBlockQuote parStyle then blockQuote else id) + +-- The relative indent is the indentation minus the indentation of the parent style. +-- This tells us whether this paragraph in particular was indented more and thus +-- should be considered a block quote. +relativeIndent :: ParagraphStyle -> Integer +relativeIndent pPr = + let pStyleLeft = fromMaybe 0 $ pStyleIndentation pPr >>= leftParIndent + pStyleHang = fromMaybe 0 $ pStyleIndentation pPr >>= hangingParIndent + left = fromMaybe pStyleLeft $ indentation pPr >>= leftParIndent + hang = fromMaybe pStyleHang $ indentation pPr >>= hangingParIndent + in (left - hang) - (pStyleLeft - pStyleHang) normalizeToClassName :: (FromStyleName a) => a -> T.Text normalizeToClassName = T.map go . fromStyleName @@ -581,7 +583,7 @@ bodyPartToBlocks (Paragraph pPr parparts) local (\s -> s{ docxInBidi = True }) (bodyPartToBlocks (Paragraph pPr' parparts)) | isCodeDiv pPr = do - transform <- parStyleToTransform pPr + transform <- paragraphStyleToTransform pPr return $ transform $ codeBlock $ @@ -608,7 +610,7 @@ bodyPartToBlocks (Paragraph pPr parparts) else prevParaIls <> space) <> ils' handleInsertion = do modify $ \s -> s {docxPrevPara = mempty} - transform <- parStyleToTransform pPr' + transform <- paragraphStyleToTransform pPr' return $ transform $ paraOrPlain ils'' opts <- asks docxOptions case (pChange pPr', readerTrackChanges opts) of @@ -623,7 +625,7 @@ bodyPartToBlocks (Paragraph pPr parparts) , AllChanges) -> do let attr = ("", ["paragraph-insertion"], addAuthorAndDate cAuthor cDate) insertMark = spanWith attr mempty - transform <- parStyleToTransform pPr' + transform <- paragraphStyleToTransform pPr' return $ transform $ paraOrPlain $ ils'' <> insertMark (Just (TrackedChange Deletion _), AcceptChanges) -> do @@ -635,7 +637,7 @@ bodyPartToBlocks (Paragraph pPr parparts) , AllChanges) -> do let attr = ("", ["paragraph-deletion"], addAuthorAndDate cAuthor cDate) insertMark = spanWith attr mempty - transform <- parStyleToTransform pPr' + transform <- paragraphStyleToTransform pPr' return $ transform $ paraOrPlain $ ils'' <> insertMark _ -> handleInsertion diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 5f29ac41a..e4d3ea6f8 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -50,6 +50,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , archiveToDocxWithWarnings , getStyleNames , pHeading + , pStyleIndentation , constructBogusParStyleData , leftBiasedMergeRunStyle , rowsToRowspans @@ -194,11 +195,6 @@ data Notes = Notes NameSpaces data Comments = Comments NameSpaces (M.Map T.Text Element) deriving Show -data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer - , rightParIndent :: Maybe Integer - , hangingParIndent :: Maybe Integer} - deriving Show - data ChangeType = Insertion | Deletion deriving Show @@ -439,6 +435,7 @@ getStyleNames = fmap getStyleName constructBogusParStyleData :: ParaStyleName -> ParStyle constructBogusParStyleData stName = ParStyle { headingLev = Nothing + , indent = Nothing , numInfo = Nothing , psParentStyle = Nothing , pStyleName = stName @@ -673,20 +670,6 @@ elemToCell ns element | isElem ns "w" "tc" element = return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents elemToCell _ _ = throwError WrongElem -elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation -elemToParIndentation ns element | isElem ns "w" "ind" element = - Just ParIndentation { - leftParIndent = - findAttrByName ns "w" "left" element >>= - stringToInteger - , rightParIndent = - findAttrByName ns "w" "right" element >>= - stringToInteger - , hangingParIndent = - findAttrByName ns "w" "hanging" element >>= - stringToInteger } -elemToParIndentation _ _ = Nothing - testBitMask :: Text -> Int -> Bool testBitMask bitMaskS n = case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of @@ -699,6 +682,9 @@ pHeading = getParStyleField headingLev . pStyle pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) pNumInfo = getParStyleField numInfo . pStyle +pStyleIndentation :: ParagraphStyle -> Maybe ParIndentation +pStyleIndentation style = (getParStyleField indent . pStyle) style + elemToBodyPart :: NameSpaces -> Element -> D BodyPart elemToBodyPart ns element | isElem ns "w" "p" element @@ -1086,8 +1072,7 @@ elemToParagraphStyle ns element sty in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style , indentation = - findChildByName ns "w" "ind" pPr >>= - elemToParIndentation ns + getIndentation ns element , dropCap = case findChildByName ns "w" "framePr" pPr >>= diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index 0d7271d6a..bb28b3009 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Docx.Parse.Styles ( , CharStyle , ParaStyleId(..) , ParStyle(..) + , ParIndentation(..) , RunStyle(..) , HasStyleName , StyleName @@ -37,6 +38,7 @@ module Text.Pandoc.Readers.Docx.Parse.Styles ( , fromStyleName , fromStyleId , stringToInteger + , getIndentation , getNumInfo , elemToRunStyle , defaultRunStyle @@ -115,7 +117,13 @@ data RunStyle = RunStyle { isBold :: Maybe Bool } deriving Show +data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer + , rightParIndent :: Maybe Integer + , hangingParIndent :: Maybe Integer} + deriving Show + data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) + , indent :: Maybe ParIndentation , numInfo :: Maybe (T.Text, T.Text) , psParentStyle :: Maybe ParStyle , pStyleName :: ParaStyleName @@ -290,6 +298,23 @@ getHeaderLevel ns element , n > 0 = Just (styleName, fromInteger n) getHeaderLevel _ _ = Nothing +getIndentation :: NameSpaces -> Element -> Maybe ParIndentation +getIndentation ns el = do + indElement <- findChildByName ns "w" "pPr" el >>= + findChildByName ns "w" "ind" + return $ ParIndentation + { + leftParIndent = findAttrByName ns "w" "left" indElement <|> + findAttrByName ns "w" "start" indElement >>= + stringToInteger + , rightParIndent = findAttrByName ns "w" "right" indElement <|> + findAttrByName ns "w" "end" indElement >>= + stringToInteger + , hangingParIndent = (findAttrByName ns "w" "hanging" indElement >>= stringToInteger) <|> + fmap negate + (findAttrByName ns "w" "firstLine" indElement >>= stringToInteger) + } + getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") @@ -314,6 +339,7 @@ elemToParStyleData ns element parentStyle = Just $ ParStyle { headingLev = getHeaderLevel ns element + , indent = getIndentation ns element , numInfo = getNumInfo ns element , psParentStyle = parentStyle , pStyleName = styleName -- cgit v1.2.3