From 7fd48b30e0a6e1e3c02a2b66c76118d10c02636f Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 09:30:30 -0400 Subject: Docx reader: Fix hdr handling in block norm `blockNormalize` previously forgot to account for the case in which a Header's inlines did not start with a space. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 9c1d0c5e6..84d50a396 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -157,6 +157,8 @@ blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) blockNormalize (Para ils) = Para $ strNormalize ils blockNormalize (Header n attr (Space : ils)) = blockNormalize $ Header n attr ils +blockNormalize (Header n attr ils) = + Header n attr $ strNormalize ils blockNormalize (Table (Space : ils) align width hdr cells) = blockNormalize $ Table ils align width hdr cells blockNormalize (Table ils align width hdr cells) = -- cgit v1.2.3 From 3da515bdb005cf16589b88d80aa4a8a71760e366 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 10:12:28 -0400 Subject: Docx reader: simplify blockNormalize Use a function `stripSpaces`, instead of recursion. Makes it a bit easier to read and mantain, and simplify normalizing DefinitionList, which was left out the first time. --- src/Text/Pandoc/Readers/Docx.hs | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 84d50a396..08afc94e6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -150,19 +150,17 @@ strNormalize (Str "" : ils) = strNormalize ils strNormalize ((Str s) : (Str s') : l) = strNormalize ((Str (s++s')) : l) strNormalize (il:ils) = il : (strNormalize ils) +stripSpaces :: [Inline] -> [Inline] +stripSpaces ils = + reverse $ dropWhile (Space ==) $ reverse $ dropWhile (Space ==) ils + blockNormalize :: Block -> Block -blockNormalize (Plain (Space : ils)) = blockNormalize (Plain ils) -blockNormalize (Plain ils) = Plain $ strNormalize ils -blockNormalize (Para (Space : ils)) = blockNormalize (Para ils) -blockNormalize (Para ils) = Para $ strNormalize ils -blockNormalize (Header n attr (Space : ils)) = - blockNormalize $ Header n attr ils +blockNormalize (Plain ils) = Plain $ strNormalize $ stripSpaces ils +blockNormalize (Para ils) = Para $ strNormalize $ stripSpaces ils blockNormalize (Header n attr ils) = - Header n attr $ strNormalize ils -blockNormalize (Table (Space : ils) align width hdr cells) = - blockNormalize $ Table ils align width hdr cells + Header n attr $ strNormalize $ stripSpaces ils blockNormalize (Table ils align width hdr cells) = - Table (strNormalize ils) align width hdr cells + Table (strNormalize $ stripSpaces ils) align width hdr cells blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -- cgit v1.2.3 From 03af19a7e12ff3a7f0a396ebed73c6c17f12ad07 Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 10:16:32 -0400 Subject: Docx Reader: Normalize DefinitionLists Previously DefinitionList had been left out of `blockNormalize`. Now it is included. --- src/Text/Pandoc/Readers/Docx.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 08afc94e6..e62cf6f0e 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -161,6 +161,8 @@ blockNormalize (Header n attr ils) = Header n attr $ strNormalize $ stripSpaces ils blockNormalize (Table ils align width hdr cells) = Table (strNormalize $ stripSpaces ils) align width hdr cells +blockNormalize (DefinitionList pairs) = + DefinitionList $ map (\(ils, blklsts) -> (strNormalize (stripSpaces ils), blklsts)) pairs blockNormalize blk = blk runToInlines :: ReaderOptions -> Docx -> Run -> [Inline] -- cgit v1.2.3 From 2aa5f58c5b82dd5750e1bf5f30e1936d132104ac Mon Sep 17 00:00:00 2001 From: Jesse Rosenthal Date: Fri, 20 Jun 2014 10:27:18 -0400 Subject: Docx reader: Add a comment explaining strNormalize `normalize` from Text.Pandoc.Shared is more general. In tests, though, it more than doubles the run time. `strNormalize` does less, but it does what we need. This comment is added for future maintainability. --- src/Text/Pandoc/Readers/Docx.hs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index e62cf6f0e..8a8bc46a6 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -144,6 +144,10 @@ runElemToString (Tab) = ['\t'] runElemsToString :: [RunElem] -> String runElemsToString = concatMap runElemToString +--- We use this instead of the more general +--- Text.Pandoc.Shared.normalize for reasons of efficiency. For +--- whatever reason, `normalize` makes a run take almost twice as +--- long. (It does more, but this does what we need) strNormalize :: [Inline] -> [Inline] strNormalize [] = [] strNormalize (Str "" : ils) = strNormalize ils -- cgit v1.2.3