diff options
-rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 9c1d0c5e6..8a8bc46a6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -144,23 +144,29 @@ runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString +--- We use this instead of the more general +--- Text.Pandoc.Shared.normalize for reasons of efficiency. For +--- whatever reason, `normalize` makes a run take almost twice as +--- long. (It does more, but this does what we need) strNormalize :: [Inline] -> [Inline] strNormalize [] = [] strNormalize (Str "" : ils) = strNormalize ils strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) strNormalize (il:ils) = il : (strNormalize ils) +stripSpaces :: [Inline] -> [Inline] +stripSpaces ils = + reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils + blockNormalize :: Block -> Block -blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils) -blockNormalize (Plain ils) = Plain $ strNormalize ils -blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) -blockNormalize (Para ils) = Para $ strNormalize ils -blockNormalize (Header n attr (Space : ils)) = - blockNormalize $ Header n attr ils -blockNormalize (Table (Space : ils) align width hdr cells) = - blockNormalize $ Table ils align width hdr cells +blockNormalize (Plain ils) = Plain $ strNormalize $ stripSpaces ils +blockNormalize (Para ils) = Para $ strNormalize $ stripSpaces ils +blockNormalize (Header n attr ils) = + Header n attr $ strNormalize $ stripSpaces ils blockNormalize (Table ils align width hdr cells) = - Table (strNormalize ils) align width hdr cells + Table (strNormalize $ stripSpaces ils) align width hdr cells +blockNormalize (DefinitionList pairs) = + DefinitionList $ map (\(ils, blklsts) -> (strNormalize (stripSpaces ils), blklsts)) pairs blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] |