aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2010-12-14 20:04:37 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2010-12-14 20:04:37 -0800
commit63d5e0c5f97ed9b1063187b73e77cd25508a0294 (patch)
tree0b7d21c726269ae49ec81393545912fe11a9ce49
parent53cb199babe37d26e799e71345db507b1c780f4b (diff)
downloadpandoc-63d5e0c5f97ed9b1063187b73e77cd25508a0294.tar.gz
Added 'normalize' to Text.Pandoc.Shared.
-rw-r--r--src/Text/Pandoc/Shared.hs54
1 files changed, 53 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 1975b7e4c..6134d879c 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -57,6 +57,7 @@ module Text.Pandoc.Shared (
-- * Pandoc block and inline list processing
orderedListMarkers,
normalizeSpaces,
+ normalize,
stringify,
compactify,
Element (..),
@@ -88,7 +89,7 @@ import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
import Codec.Binary.UTF8.String ( encodeString, decodeString )
import System.Directory
import System.FilePath ( (</>) )
-import Data.Generics (Typeable, Data)
+import Data.Generics (Typeable, Data, everywhere', mkT)
import qualified Control.Monad.State as S
import Paths_pandoc (getDataFileName)
@@ -339,6 +340,57 @@ normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
cleanup ((Str ""):rest) = cleanup rest
cleanup (x:rest) = x : cleanup rest
+-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
+-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
+-- empty elements, etc.
+normalize :: Pandoc -> Pandoc
+normalize = everywhere' (mkT normalizeInlines) .
+ everywhere' (mkT normalizeBlocks)
+
+normalizeBlocks :: [Block] -> [Block]
+normalizeBlocks (Null : xs) = normalizeBlocks xs
+normalizeBlocks (RawHtml x : RawHtml y : zs) = normalizeBlocks $
+ RawHtml (x++y) : zs
+normalizeBlocks (x:xs) = x : normalizeBlocks xs
+normalizeBlocks [] = []
+
+normalizeInlines :: [Inline] -> [Inline]
+normalizeInlines (Str x : ys) =
+ case concat (x : map fromStr strs) of
+ "" -> normalizeInlines rest
+ n -> Str n : normalizeInlines rest
+ where
+ (strs, rest) = span isStr ys
+ isStr (Str _) = True
+ isStr _ = False
+ fromStr (Str z) = z
+ fromStr _ = error "normalizeInlines - fromStr - not a Str"
+normalizeInlines (Space : ys) =
+ if null rest
+ then []
+ else Space : rest
+ where isSpace Space = True
+ isSpace _ = False
+ rest = normalizeInlines $ dropWhile isSpace ys
+normalizeInlines (Emph xs : Emph ys : zs) = normalizeInlines $
+ Emph (xs ++ ys) : zs
+normalizeInlines (Strong xs : Strong ys : zs) = normalizeInlines $
+ Strong (xs ++ ys) : zs
+normalizeInlines (Subscript xs : Subscript ys : zs) = normalizeInlines $
+ Subscript (xs ++ ys) : zs
+normalizeInlines (Superscript xs : Superscript ys : zs) = normalizeInlines $
+ Superscript (xs ++ ys) : zs
+normalizeInlines (SmallCaps xs : SmallCaps ys : zs) = normalizeInlines $
+ SmallCaps (xs ++ ys) : zs
+normalizeInlines (Strikeout xs : Strikeout ys : zs) = normalizeInlines $
+ Strikeout (xs ++ ys) : zs
+normalizeInlines (TeX x : TeX y : zs) = normalizeInlines $
+ TeX (x ++ y) : zs
+normalizeInlines (HtmlInline x : HtmlInline y : zs) = normalizeInlines $
+ HtmlInline (x ++ y) : zs
+normalizeInlines (x : xs) = x : normalizeInlines xs
+normalizeInlines [] = []
+
-- | Convert list of inlines to a string with formatting removed.
stringify :: [Inline] -> String
stringify = queryWith go