From 7807564d4493af1462d61138c63d8ec365abc792 Mon Sep 17 00:00:00 2001 From: mpickering Date: Mon, 16 Jun 2014 20:45:54 +0100 Subject: Moved extractSpaces to Shared.hs Generalised and move the extractSpaces function from `HTML.hs` to `Shared.hs` so that the docx reader can also use it. --- src/Text/Pandoc/Readers/HTML.hs | 17 ++++------------- src/Text/Pandoc/Shared.hs | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 905e55b22..9cdc5a567 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,7 +50,6 @@ import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) import Control.Applicative ( (<$>), (<$), (<*) ) import Data.Monoid -import Data.Sequence (ViewL(..), ViewR(..), viewr, viewl) isSpace :: Char -> Bool isSpace ' ' = True @@ -369,9 +368,9 @@ pQ = do then InSingleQuote else InDoubleQuote let constructor = case quoteType of - SingleQuote -> B.singleQuoted + SingleQuote -> B.singleQuoted DoubleQuote -> B.doubleQuoted - withQuoteContext innerQuoteContext $ + withQuoteContext innerQuoteContext $ pInlinesInTags "q" constructor pEmph :: TagParser Inlines @@ -406,7 +405,7 @@ pLink = try $ do let url = fromAttrib "href" tag let title = fromAttrib "title" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ B.link (escapeURI url) title lab + return $ B.link (escapeURI url) title lab pImage :: TagParser Inlines pImage = do @@ -439,15 +438,7 @@ pRawHtmlInline = do pInlinesInTags :: String -> (Inlines -> Inlines) -> TagParser Inlines -pInlinesInTags tagtype f = do - contents <- B.unMany <$> pInTags tagtype inline - let left = case viewl contents of - (Space :< _) -> B.space - _ -> mempty - let right = case viewr contents of - (_ :> Space) -> B.space - _ -> mempty - return (left <> f (trimInlines . B.Many $ contents) <> right) +pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline pInTags :: (Monoid a) => String -> TagParser a -> TagParser a diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index b0adf55f5..5b0d9b6b4 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -53,6 +53,7 @@ module Text.Pandoc.Shared ( -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, + extractSpaces, normalize, stringify, compactify, @@ -113,6 +114,7 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as B8 import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) +import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -331,6 +333,20 @@ isSpaceOrEmpty Space = True isSpaceOrEmpty (Str "") = True isSpaceOrEmpty _ = False +-- | Extract the leading and trailing spaces from inside an inline element +-- and place them outside the element. + +extractSpaces :: (Inlines -> Inlines) -> Inlines -> Inlines +extractSpaces f is = + let contents = B.unMany is + left = case viewl contents of + (Space :< _) -> B.space + _ -> mempty + right = case viewr contents of + (_ :> Space) -> B.space + _ -> mempty in + (left <> f (B.trimInlines . B.Many $ contents) <> right) + -- | Normalize @Pandoc@ document, consolidating doubled 'Space's, -- combining adjacent 'Str's and 'Emph's, remove 'Null's and -- empty elements, etc. -- cgit v1.2.3