From b1eba3c65ca2981c78b66d1ee9218f3dfeda89c7 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sun, 29 Jun 2014 08:14:36 -0400
Subject: Docx Reader: Update state properly

Previously, a fresh state was created for the purpose of updating. In
the future, when there is more than one field in the state, this
obviously won't work.
---
 src/Text/Pandoc/Readers/Docx.hs | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 71baa5dde..2e10ae3a4 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -110,6 +110,11 @@ data DEnv = DEnv { docxOptions  :: ReaderOptions
 
 type DocxContext = ReaderT DEnv (State DState)
 
+updateDState :: (DState -> DState) -> DocxContext ()
+updateDState f = do
+  st <- get
+  put $ f st
+
 evalDocxContext :: DocxContext a -> DEnv -> DState -> a
 evalDocxContext ctx env st = evalState (runReaderT ctx env) st
 
@@ -289,7 +294,7 @@ parPartToInlines (BookMark _ anchor) =
     let newAnchor = case anchor `elem` (M.elems anchorMap) of
           True -> uniqueIdent [Str anchor] (M.elems anchorMap)
           False -> anchor
-    put DState{ docxAnchorMap = M.insert anchor newAnchor anchorMap}
+    updateDState $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap}
     return [Span (anchor, ["anchor"], []) []]
 parPartToInlines (Drawing relid) = do
   (Docx _ _ _ rels _) <- asks docxDocument
@@ -329,7 +334,7 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils)
     do
       hdrIDMap <- gets docxAnchorMap
       let newIdent = uniqueIdent ils (M.elems hdrIDMap)
-      put DState{docxAnchorMap = M.insert ident newIdent hdrIDMap}
+      updateDState $ \s -> s {docxAnchorMap = M.insert ident newIdent hdrIDMap}
       return $ Header n (newIdent, classes, kvs) (ils \\ (x:xs))
 -- Otherwise we just give it a name, and register that name (associate
 -- it with itself.)
@@ -337,7 +342,7 @@ makeHeaderAnchor (Header n (_, classes, kvs) ils) =
   do
     hdrIDMap <- gets docxAnchorMap
     let newIdent = uniqueIdent ils (M.elems hdrIDMap)
-    put DState{docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
+    updateDState $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
     return $ Header n (newIdent, classes, kvs) ils
 makeHeaderAnchor blk = return blk
 
-- 
cgit v1.2.3


From 0587334bc0c87ae62f37cf14543b7727d408aeca Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sun, 29 Jun 2014 16:38:51 -0400
Subject: Docx writer: insert bookmark tags inside <w:p> tag.

This makes the header anchors in pandoc-generated ooxml match those
generated by word.
---
 src/Text/Pandoc/Writers/Docx.hs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index 31e64f14e..4b787b023 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -514,8 +514,11 @@ blockToOpenXML :: WriterOptions -> Block -> WS [Element]
 blockToOpenXML _ Null = return []
 blockToOpenXML opts (Div _ bs) = blocksToOpenXML opts bs
 blockToOpenXML opts (Header lev (ident,_,_) lst) = do
-  contents <- withParaProp (pStyle $ "Heading" ++ show lev) $
-               blockToOpenXML opts (Para lst)
+
+  paraProps <- withParaProp (pStyle $ "Heading" ++ show lev) $
+               getParaProps False
+  contents <- inlinesToOpenXML opts lst
+
   usedIdents <- gets stSectionIds
   let bookmarkName = if null ident
                         then uniqueIdent lst usedIdents
@@ -525,7 +528,7 @@ blockToOpenXML opts (Header lev (ident,_,_) lst) = do
   let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id')
                                                ,("w:name",bookmarkName)] ()
   let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] ()
-  return $ [bookmarkStart] ++ contents ++ [bookmarkEnd]
+  return [mknode "w:p" [] (paraProps ++ [bookmarkStart, bookmarkEnd] ++ contents)]
 blockToOpenXML opts (Plain lst) = withParaProp (pStyle "Compact")
   $ blockToOpenXML opts (Para lst)
 -- title beginning with fig: indicates that the image is a figure
-- 
cgit v1.2.3


From c0fcc8a7891892357854cf498ce262b256fac1ca Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sun, 29 Jun 2014 18:44:22 -0400
Subject: Docx reader: Add ParIndentation type to parser.

This lets us keep more information about the indentation, and act
accordingly in the reader.
---
 src/Text/Pandoc/Readers/Docx/Parse.hs | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 07f34450d..537c5c272 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -42,6 +42,7 @@ module Text.Pandoc.Readers.Docx.Parse (  Docx(..)
                                        , Relationship
                                        , Media
                                        , RunStyle(..)
+                                       , ParIndentation(..)
                                        , ParagraphStyle(..)
                                        , Row(..)
                                        , Cell(..)
@@ -341,16 +342,37 @@ testBitMask bitMaskS n =
     []            -> False
     ((n', _) : _) -> ((n' .|. n) /= 0)
 
+data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
+                                     , rightParIndent :: Maybe Integer
+                                     , hangingParIndent :: Maybe Integer}
+                      deriving Show
+
 data ParagraphStyle = ParagraphStyle { pStyle :: [String]
-                                     , indent :: Maybe Integer
+                                     , indentation :: Maybe ParIndentation
                                      }
                       deriving Show
 
 defaultParagraphStyle :: ParagraphStyle
 defaultParagraphStyle = ParagraphStyle { pStyle = []
-                                       , indent = Nothing
+                                       , indentation = Nothing
                                        }
 
+elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation
+elemToParIndentation ns element
+  | qName (elName element) == "ind" &&
+    qURI (elName element) == (lookup "w" ns) =
+      Just $ ParIndentation {
+        leftParIndent =
+           findAttr (QName "left" (lookup "w" ns) (Just "w")) element >>=
+           stringToInteger
+        , rightParIndent = 
+          findAttr (QName "right" (lookup "w" ns) (Just "w")) element >>=
+          stringToInteger
+        , hangingParIndent = 
+          findAttr (QName "hanging" (lookup "w" ns) (Just "w")) element >>=
+          stringToInteger}
+elemToParIndentation _ _ = Nothing
+
 elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
 elemToParagraphStyle ns element =
   case findChild (QName "pPr" (lookup "w" ns) (Just "w")) element of
@@ -360,10 +382,9 @@ elemToParagraphStyle ns element =
           mapMaybe
           (findAttr (QName "val" (lookup "w" ns) (Just "w")))
           (findChildren (QName "pStyle" (lookup "w" ns) (Just "w")) pPr)
-      , indent =
+      , indentation =
         findChild (QName "ind" (lookup "w" ns) (Just "w")) pPr >>=
-        findAttr (QName "left" (lookup "w" ns) (Just "w")) >>=
-        stringToInteger
+        elemToParIndentation ns
         }
     Nothing -> defaultParagraphStyle
 
-- 
cgit v1.2.3


From 0f59196e0ef2f0977d404699ae16a48009fa7632 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Sun, 29 Jun 2014 22:50:29 -0400
Subject: Docx reader: Make use of new ParIndentation info.

Here, when hanging indents are greater than or equal to left indents, we
don't set it to block quote. Such indents are frequently used in
academic bibliographies. (Thanks to Caleb McDaniel.)
---
 src/Text/Pandoc/Readers/Docx.hs | 35 +++++++++++++++++++++++++----------
 1 file changed, 25 insertions(+), 10 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 71baa5dde..8357b4cca 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -94,6 +94,7 @@ import System.FilePath (combine)
 import qualified Data.Map as M
 import Control.Monad.Reader
 import Control.Monad.State
+import Control.Applicative (liftA2)
 
 readDocx :: ReaderOptions
          -> B.ByteString
@@ -148,7 +149,6 @@ runStyleToContainers rPr =
   in
    classContainers ++ formatters
 
-
 divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
 divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c =
   [Container $ \_ ->
@@ -166,22 +166,37 @@ divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
 divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs =
   (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs)
 divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs
-divAttrToContainers [] kvs | Just numString <- lookup "indent" kvs =
-  let kvs' = filter (\(k,_) -> k /= "indent") kvs
+divAttrToContainers [] kvs | Just _ <- lookup "indent" kvs
+                           , Just flInd <- lookup "first-line-indent" kvs =
+  let
+    kvs' = filter (\(k,_) -> notElem k ["indent", "first-line-indent"]) kvs
   in
-   case numString of
-     "0"       -> divAttrToContainers [] kvs'
-     ('-' : _) -> divAttrToContainers [] kvs'
-     _         -> (Container BlockQuote) : divAttrToContainers [] kvs'
+   case flInd of
+     "0" -> divAttrToContainers [] kvs'
+     ('-':_) -> divAttrToContainers [] kvs'
+     _       -> (Container BlockQuote) : divAttrToContainers [] kvs'
+divAttrToContainers [] kvs | Just ind <- lookup "indent" kvs =
+  let
+    kvs' = filter (\(k,_) -> notElem k ["indent"]) kvs
+  in
+   case ind of
+     "0" -> divAttrToContainers [] kvs'
+     ('-':_) -> divAttrToContainers [] kvs'
+     _       -> (Container BlockQuote) : divAttrToContainers [] kvs'
+
 divAttrToContainers _ _ = []
 
 
 parStyleToContainers :: ParagraphStyle -> [Container Block]
 parStyleToContainers pPr =
   let classes = pStyle pPr
-      kvs = case indent pPr of
-        Just n -> [("indent", show n)]
-        Nothing -> []
+      indent = indentation pPr >>= leftParIndent
+      hanging = indentation pPr >>= hangingParIndent
+      firstLineIndent = liftA2 (-) indent hanging
+      kvs = mapMaybe id
+            [ indent >>= (\n -> Just ("indent", show n)),
+              firstLineIndent >>= (\n -> Just ("first-line-indent", show n))
+              ]
   in
    divAttrToContainers classes kvs
   
-- 
cgit v1.2.3


From 3fbbafd391334429df49255160ace17245409e41 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sun, 29 Jun 2014 23:03:12 -0700
Subject: Rewrote normalize for efficiency. (Closes #1385.)

* Added normalizeInlines, normalizeBlocks.
* Type signature is now more narrow, `Pandoc -> Pandoc` instead of
  `Data a :: a -> a`.  Some users may need to change their uses of
  `normalize` to the newly exported `normalizeInlines` or
  `normalizeBlocks`.
---
 man/make-pandoc-man-pages.hs |   6 +-
 src/Text/Pandoc/Shared.hs    | 191 +++++++++++++++++++++++++++++--------------
 tests/Tests/Shared.hs        |   8 +-
 3 files changed, 139 insertions(+), 66 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/man/make-pandoc-man-pages.hs b/man/make-pandoc-man-pages.hs
index 008294433..afba9135a 100644
--- a/man/make-pandoc-man-pages.hs
+++ b/man/make-pandoc-man-pages.hs
@@ -27,7 +27,7 @@ main = do
 
   unless (null ds1 && null ds2) $ do
     rmContents <- UTF8.readFile "README"
-    let (Pandoc meta blocks) = readMarkdown def rmContents
+    let (Pandoc meta blocks) = normalize $ readMarkdown def rmContents
     let manBlocks = removeSect [Str "Wrappers"]
                   $ removeSect [Str "Pandoc's",Space,Str "markdown"] blocks
     let syntaxBlocks = extractSect [Str "Pandoc's",Space,Str "markdown"] blocks
@@ -67,13 +67,13 @@ capitalize (Str xs) = Str $ map toUpper xs
 capitalize x = x
 
 removeSect :: [Inline] -> [Block] -> [Block]
-removeSect ils (Header 1 _ x:xs) | normalize x == normalize ils =
+removeSect ils (Header 1 _ x:xs) | x == ils =
   dropWhile (not . isHeader1) xs
 removeSect ils (x:xs) = x : removeSect ils xs
 removeSect _ [] = []
 
 extractSect :: [Inline] -> [Block] -> [Block]
-extractSect ils (Header 1 _ z:xs) | normalize z == normalize ils =
+extractSect ils (Header 1 _ z:xs) | z == ils =
   bottomUp promoteHeader $ takeWhile (not . isHeader1) xs
     where promoteHeader (Header n attr x) = Header (n-1) attr x
           promoteHeader x            = x
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 5b0d9b6b4..4a536330d 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -55,6 +55,8 @@ module Text.Pandoc.Shared (
                      normalizeSpaces,
                      extractSpaces,
                      normalize,
+                     normalizeInlines,
+                     normalizeBlocks,
                      stringify,
                      compactify,
                      compactify',
@@ -84,7 +86,6 @@ module Text.Pandoc.Shared (
 
 import Text.Pandoc.Definition
 import Text.Pandoc.Walk
-import Text.Pandoc.Generic
 import Text.Pandoc.Builder (Inlines, Blocks, ToMetaValue(..))
 import qualified Text.Pandoc.Builder as B
 import qualified Text.Pandoc.UTF8 as UTF8
@@ -350,72 +351,142 @@ extractSpaces f is =
 -- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
 -- combining adjacent 'Str's and 'Emph's, remove 'Null's and
 -- empty elements, etc.
-normalize :: (Eq a, Data a) => a -> a
-normalize = topDown removeEmptyBlocks .
-            topDown consolidateInlines .
-            bottomUp (removeEmptyInlines . removeTrailingInlineSpaces)
-
-removeEmptyBlocks :: [Block] -> [Block]
-removeEmptyBlocks (Null : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (BulletList [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (OrderedList _ [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (DefinitionList [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (RawBlock _ [] : xs) = removeEmptyBlocks xs
-removeEmptyBlocks (x:xs) = x : removeEmptyBlocks xs
-removeEmptyBlocks [] = []
-
-removeEmptyInlines :: [Inline] -> [Inline]
-removeEmptyInlines (Emph [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Strong [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Subscript [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Superscript [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (SmallCaps [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Strikeout [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (RawInline _ [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Code _ [] : zs) = removeEmptyInlines zs
-removeEmptyInlines (Str "" : zs) = removeEmptyInlines zs
-removeEmptyInlines (x : xs) = x : removeEmptyInlines xs
-removeEmptyInlines [] = []
-
-removeTrailingInlineSpaces :: [Inline] -> [Inline]
-removeTrailingInlineSpaces = reverse . removeLeadingInlineSpaces . reverse
-
-removeLeadingInlineSpaces :: [Inline] -> [Inline]
-removeLeadingInlineSpaces = dropWhile isSpaceOrEmpty
-
-consolidateInlines :: [Inline] -> [Inline]
-consolidateInlines (Str x : ys) =
+normalize :: Pandoc -> Pandoc
+normalize (Pandoc (Meta meta) blocks) =
+  Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
+  where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
+        go (MetaBlocks xs)  = MetaBlocks  $ normalizeBlocks xs
+        go (MetaList ms)    = MetaList $ map go ms
+        go (MetaMap m)      = MetaMap $ M.map go m
+        go x                = x
+
+normalizeBlocks :: [Block] -> [Block]
+normalizeBlocks (Null : xs) = normalizeBlocks xs
+normalizeBlocks (Div attr bs : xs) =
+  Div attr (normalizeBlocks bs) : normalizeBlocks xs
+normalizeBlocks (BlockQuote bs : xs) =
+  case normalizeBlocks bs of
+       []    -> normalizeBlocks xs
+       bs'   -> BlockQuote bs' : normalizeBlocks xs
+normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
+normalizeBlocks (BulletList items : xs) =
+  BulletList (map normalizeBlocks items) : normalizeBlocks xs
+normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
+normalizeBlocks (OrderedList attr items : xs) =
+  OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
+normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
+normalizeBlocks (DefinitionList items : xs) =
+  DefinitionList (map go items) : normalizeBlocks xs
+  where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
+normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
+normalizeBlocks (Para ils : xs) =
+  case normalizeInlines ils of
+       []   -> normalizeBlocks xs
+       ils' -> Para ils' : normalizeBlocks xs
+normalizeBlocks (Plain ils : xs) =
+  case normalizeInlines ils of
+       []   -> normalizeBlocks xs
+       ils' -> Plain ils' : normalizeBlocks xs
+normalizeBlocks (Header lev attr ils : xs) =
+  Header lev attr (normalizeInlines ils) : normalizeBlocks xs
+normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
+  Table (normalizeInlines capt) aligns widths
+    (map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
+  : normalizeBlocks xs
+normalizeBlocks (x:xs) = x : normalizeBlocks xs
+normalizeBlocks [] = []
+
+normalizeInlines :: [Inline] -> [Inline]
+normalizeInlines (Str x : ys) =
   case concat (x : map fromStr strs) of
-        ""     -> consolidateInlines rest
-        n      -> Str n : consolidateInlines rest
+        ""     -> rest
+        n      -> Str n : rest
    where
-     (strs, rest)  = span isStr ys
+     (strs, rest)  = span isStr $ normalizeInlines ys
      isStr (Str _) = True
      isStr _       = False
      fromStr (Str z) = z
-     fromStr _       = error "consolidateInlines - fromStr - not a Str"
-consolidateInlines (Space : ys) = Space : rest
+     fromStr _       = error "normalizeInlines - fromStr - not a Str"
+normalizeInlines (Space : ys) =
+  if null rest
+     then []
+     else Space : rest
    where isSp Space = True
          isSp _     = False
-         rest       = consolidateInlines $ dropWhile isSp ys
-consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
-  Emph (xs ++ ys) : zs
-consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
-  Strong (xs ++ ys) : zs
-consolidateInlines (Subscript xs : Subscript ys : zs) = consolidateInlines $
-  Subscript (xs ++ ys) : zs
-consolidateInlines (Superscript xs : Superscript ys : zs) = consolidateInlines $
-  Superscript (xs ++ ys) : zs
-consolidateInlines (SmallCaps xs : SmallCaps ys : zs) = consolidateInlines $
-  SmallCaps (xs ++ ys) : zs
-consolidateInlines (Strikeout xs : Strikeout ys : zs) = consolidateInlines $
-  Strikeout (xs ++ ys) : zs
-consolidateInlines (RawInline f x : RawInline f' y : zs) | f == f' =
-  consolidateInlines $ RawInline f (x ++ y) : zs
-consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 =
-  consolidateInlines $ Code a1 (x ++ y) : zs
-consolidateInlines (x : xs) = x : consolidateInlines xs
-consolidateInlines [] = []
+         rest       = dropWhile isSp $ normalizeInlines ys
+normalizeInlines (Emph xs : zs) =
+  case normalizeInlines zs of
+       (Emph ys : rest) -> normalizeInlines $
+         Emph (normalizeInlines $ xs ++ ys) : rest
+       rest -> case normalizeInlines xs of
+                    []  -> rest
+                    xs' -> Emph xs' : rest
+normalizeInlines (Strong xs : zs) =
+  case normalizeInlines zs of
+       (Strong ys : rest) -> normalizeInlines $
+         Strong (normalizeInlines $ xs ++ ys) : rest
+       rest -> case normalizeInlines xs of
+                    []  -> rest
+                    xs' -> Strong xs' : rest
+normalizeInlines (Subscript xs : zs) =
+  case normalizeInlines zs of
+       (Subscript ys : rest) -> normalizeInlines $
+         Subscript (normalizeInlines $ xs ++ ys) : rest
+       rest -> case normalizeInlines xs of
+                    []  -> rest
+                    xs' -> Subscript xs' : rest
+normalizeInlines (Superscript xs : zs) =
+  case normalizeInlines zs of
+       (Superscript ys : rest) -> normalizeInlines $
+         Superscript (normalizeInlines $ xs ++ ys) : rest
+       rest -> case normalizeInlines xs of
+                    []  -> rest
+                    xs' -> Superscript xs' : rest
+normalizeInlines (SmallCaps xs : zs) =
+  case normalizeInlines zs of
+       (SmallCaps ys : rest) -> normalizeInlines $
+         SmallCaps (normalizeInlines $ xs ++ ys) : rest
+       rest -> case normalizeInlines xs of
+                    []  -> rest
+                    xs' -> SmallCaps xs' : rest
+normalizeInlines (Strikeout xs : zs) =
+  case normalizeInlines zs of
+       (Strikeout ys : rest) -> normalizeInlines $
+         Strikeout (normalizeInlines $ xs ++ ys) : rest
+       rest -> case normalizeInlines xs of
+                    []  -> rest
+                    xs' -> Strikeout xs' : rest
+normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
+normalizeInlines (RawInline f xs : zs) =
+  case normalizeInlines zs of
+       (RawInline f' ys : rest) | f == f' -> normalizeInlines $
+         RawInline f (xs ++ ys) : rest
+       rest -> RawInline f xs : rest
+normalizeInlines (Code _ "" : ys) = normalizeInlines ys
+normalizeInlines (Code attr xs : zs) =
+  case normalizeInlines zs of
+       (Code attr' ys : rest) | attr == attr' -> normalizeInlines $
+         Code attr (xs ++ ys) : rest
+       rest -> Code attr xs : rest
+-- allow empty spans, they may carry identifiers etc.
+-- normalizeInlines (Span _ [] : ys) = normalizeInlines ys
+normalizeInlines (Span attr xs : zs) =
+  case normalizeInlines zs of
+       (Span attr' ys : rest) | attr == attr' -> normalizeInlines $
+         Span attr (normalizeInlines $ xs ++ ys) : rest
+       rest -> Span attr (normalizeInlines xs) : rest
+normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
+  normalizeInlines ys
+normalizeInlines (Quoted qt ils : ys) =
+  Quoted qt (normalizeInlines ils) : normalizeInlines ys
+normalizeInlines (Link ils t : ys) =
+  Link (normalizeInlines ils) t : normalizeInlines ys
+normalizeInlines (Image ils t : ys) =
+  Image (normalizeInlines ils) t : normalizeInlines ys
+normalizeInlines (Cite cs ils : ys) =
+  Cite cs (normalizeInlines ils) : normalizeInlines ys
+normalizeInlines (x : xs) = x : normalizeInlines xs
+normalizeInlines [] = []
 
 -- | Convert pandoc structure to a string with formatting removed.
 -- Footnotes are skipped (since we don't want their contents in link
diff --git a/tests/Tests/Shared.hs b/tests/Tests/Shared.hs
index f4bf13da4..8c7c31674 100644
--- a/tests/Tests/Shared.hs
+++ b/tests/Tests/Shared.hs
@@ -16,11 +16,13 @@ tests = [ testGroup "normalize"
         ]
 
 p_normalize_blocks_rt :: [Block] -> Bool
-p_normalize_blocks_rt bs = normalize bs == normalize (normalize bs)
+p_normalize_blocks_rt bs =
+  normalizeBlocks bs == normalizeBlocks (normalizeBlocks bs)
 
 p_normalize_inlines_rt :: [Inline] -> Bool
-p_normalize_inlines_rt ils = normalize ils == normalize (normalize ils)
+p_normalize_inlines_rt ils =
+  normalizeInlines ils == normalizeInlines (normalizeInlines ils)
 
 p_normalize_no_trailing_spaces :: [Inline] -> Bool
 p_normalize_no_trailing_spaces ils = null ils' || last ils' /= Space
-  where ils' = normalize $ ils ++ [Space]
+  where ils' = normalizeInlines $ ils ++ [Space]
-- 
cgit v1.2.3


From 0abfd386a4697bab78ca098fe439623aad5f4069 Mon Sep 17 00:00:00 2001
From: Jesse Rosenthal <jrosenthal@jhu.edu>
Date: Mon, 30 Jun 2014 11:19:06 -0400
Subject: Docx reader: clean up parStyle processing.

This gets rid of `divAttrToContainers`: an internal convenience function
which had become pretty inconvenient. Rather than converting classes and
indentations to string lists and back, we deal with the `pPr` attribute
directly.
---
 src/Text/Pandoc/Readers/Docx.hs | 81 ++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 45 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 6e9cf44b5..61c17156e 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -94,7 +94,6 @@ import System.FilePath (combine)
 import qualified Data.Map as M
 import Control.Monad.Reader
 import Control.Monad.State
-import Control.Applicative (liftA2)
 
 readDocx :: ReaderOptions
          -> B.ByteString
@@ -154,56 +153,48 @@ runStyleToContainers rPr =
   in
    classContainers ++ formatters
 
-divAttrToContainers :: [String] -> [(String, String)] -> [Container Block]
-divAttrToContainers (c:cs) _ | Just n <- isHeaderClass c =
-  [Container $ \_ ->
-     Header n ("", delete ("Heading" ++ show n) cs, []) []]
-divAttrToContainers (c:cs) kvs | c `elem` divsToKeep =
-  (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs)
-divAttrToContainers (c:cs) kvs | c `elem` codeDivs =
+parStyleToContainers :: ParagraphStyle -> [Container Block]
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, Just n <- isHeaderClass c =
+  [Container $ \_ -> Header n ("", delete ("Heading" ++ show n) cs, []) []]
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` divsToKeep =
+  let pPr' = pPr { pStyle = cs }
+  in
+   (Container $ Div ("", [c], [])) : (parStyleToContainers pPr')
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` codeDivs =
   -- This is a bit of a cludge. We make the codeblock from the raw
   -- parparts in bodyPartToBlocks. But we need something to match against.
-  (Container $ \_ -> CodeBlock ("", [], []) "") : (divAttrToContainers cs kvs)
-divAttrToContainers (c:cs) kvs | c `elem` listParagraphDivs =
-  let kvs' = filter (\(k,_) -> k /= "indent") kvs
-  in
-   (Container $ Div ("", [c], [])) : (divAttrToContainers cs kvs')
-divAttrToContainers (c:cs) kvs | c `elem` blockQuoteDivs =
-  (Container BlockQuote) : (divAttrToContainers (cs \\ blockQuoteDivs) kvs)
-divAttrToContainers (_:cs) kvs = divAttrToContainers cs kvs
-divAttrToContainers [] kvs | Just _ <- lookup "indent" kvs
-                           , Just flInd <- lookup "first-line-indent" kvs =
-  let
-    kvs' = filter (\(k,_) -> notElem k ["indent", "first-line-indent"]) kvs
+  let pPr' = pPr { pStyle = cs }
   in
-   case flInd of
-     "0" -> divAttrToContainers [] kvs'
-     ('-':_) -> divAttrToContainers [] kvs'
-     _       -> (Container BlockQuote) : divAttrToContainers [] kvs'
-divAttrToContainers [] kvs | Just ind <- lookup "indent" kvs =
-  let
-    kvs' = filter (\(k,_) -> notElem k ["indent"]) kvs
+   (Container $ \_ -> CodeBlock ("", [], []) "") : (parStyleToContainers pPr')
+parStyleToContainers pPr | (c:cs) <- pStyle pPr,  c `elem` listParagraphDivs =
+  let pPr' = pPr { pStyle = cs, indentation = Nothing}
   in
-   case ind of
-     "0" -> divAttrToContainers [] kvs'
-     ('-':_) -> divAttrToContainers [] kvs'
-     _       -> (Container BlockQuote) : divAttrToContainers [] kvs'
-
-divAttrToContainers _ _ = []
+   (Container $ Div ("", [c], [])) : (parStyleToContainers pPr')
 
-
-parStyleToContainers :: ParagraphStyle -> [Container Block]
-parStyleToContainers pPr =
-  let classes = pStyle pPr
-      indent = indentation pPr >>= leftParIndent
-      hanging = indentation pPr >>= hangingParIndent
-      firstLineIndent = liftA2 (-) indent hanging
-      kvs = mapMaybe id
-            [ indent >>= (\n -> Just ("indent", show n)),
-              firstLineIndent >>= (\n -> Just ("first-line-indent", show n))
-              ]
+parStyleToContainers pPr | (c:cs) <- pStyle pPr, c `elem` blockQuoteDivs =
+  let pPr' = pPr { pStyle = cs \\ blockQuoteDivs }
+  in
+   (Container BlockQuote) : (parStyleToContainers pPr')
+parStyleToContainers pPr | (_:cs) <- pStyle pPr =
+  let pPr' = pPr { pStyle = cs}
+  in
+    parStyleToContainers pPr'                                 
+parStyleToContainers pPr | null (pStyle pPr),
+                          Just left <- indentation pPr >>= leftParIndent,
+                          Just hang <- indentation pPr >>= hangingParIndent =
+  let pPr' = pPr { indentation = Nothing }
+  in
+   case (left - hang) > 0 of
+     True -> (Container BlockQuote) : (parStyleToContainers pPr')
+     False -> parStyleToContainers pPr'
+parStyleToContainers pPr | null (pStyle pPr),
+                          Just left <- indentation pPr >>= leftParIndent =
+  let pPr' = pPr { indentation = Nothing }
   in
-   divAttrToContainers classes kvs
+   case left > 0 of
+     True -> (Container BlockQuote) : (parStyleToContainers pPr')
+     False -> parStyleToContainers pPr'
+parStyleToContainers _ = []
   
 
 strToInlines :: String -> [Inline]
-- 
cgit v1.2.3