From 11945ea5ec0a9ed42b0c24f6f9ecff69e86784a1 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Sun, 3 Nov 2019 21:25:56 -0500 Subject: Docx reader: Only use LTR when it is overriding BiDi setting The left-to-right direction setting in docx is used in the spec only for overriding an explicit right-to-left setting. We only process it when it happens in a paragraph set with BiDi. This is especially important for docs exported from Google Docs, which explicitly (and unnecessarily) set "rtl=0" for every paragraph. Closes: #5723 --- src/Text/Pandoc/Readers/Docx.hs | 12 ++++++++++-- src/Text/Pandoc/Readers/Docx/Parse.hs | 3 +++ src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 8b0b295ab..387c3c7e2 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -124,10 +124,11 @@ instance Default DState where data DEnv = DEnv { docxOptions :: ReaderOptions , docxInHeaderBlock :: Bool + , docxInBidi :: Bool } instance Default DEnv where - def = DEnv def False + def = DEnv def False False type DocxContext m = ReaderT DEnv (StateT DState m) @@ -290,7 +291,10 @@ runStyleToTransform rPr return $ spanWith ("",[],[("dir","rtl")]) . transform | Just False <- isRTL rPr = do transform <- runStyleToTransform rPr{isRTL = Nothing} - return $ spanWith ("",[],[("dir","ltr")]) . transform + inBidi <- asks docxInBidi + return $ if inBidi + then spanWith ("",[],[("dir","ltr")]) . transform + else transform | Just SupScrpt <- rVertAlign rPr = do transform <- runStyleToTransform rPr{rVertAlign = Nothing} return $ superscript . transform @@ -561,6 +565,10 @@ normalizeToClassName = map go . fromStyleName bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks bodyPartToBlocks (Paragraph pPr parparts) + | Just True <- pBidi pPr = do + let pPr' = pPr { pBidi = Nothing } + local (\s -> s{ docxInBidi = True }) + (bodyPartToBlocks (Paragraph pPr' parparts)) | isCodeDiv pPr = do transform <- parStyleToTransform pPr return $ diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 8c5c94bb9..889bd80fc 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -224,6 +224,7 @@ data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle] , indentation :: Maybe ParIndentation , dropCap :: Bool , pChange :: Maybe TrackedChange + , pBidi :: Maybe Bool } deriving Show @@ -232,6 +233,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing , dropCap = False , pChange = Nothing + , pBidi = Just False } @@ -998,6 +1000,7 @@ elemToParagraphStyle ns element sty isElem ns "w" "moveFrom" e ) >>= getTrackedChange ns + , pBidi = checkOnOff ns pPr (elemName ns "w" "bidi") } elemToParagraphStyle _ _ _ = defaultParagraphStyle diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index c2d27e7fb..ac2d6fa07 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -40,6 +40,7 @@ module Text.Pandoc.Readers.Docx.Parse.Styles ( , getNumInfo , elemToRunStyle , defaultRunStyle + , checkOnOff ) where import Prelude import Codec.Archive.Zip -- cgit v1.2.3