From ec88d47f23d6761cf2120f76e45ca23cdc478e6c Mon Sep 17 00:00:00 2001 From: Artyom Kazak Date: Sun, 3 Aug 2014 16:48:55 +0400 Subject: Correctly implement capitalisation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using `map toUpper` to capitalise text is wrong, as e.g. “Straße” should be converted to “STRASSE”, which is 1 character longer. This commit adds a `capitalize` function and replaces 2 identical implementations in different modules (`toCaps` and `capitalize`) with it. --- src/Text/Pandoc/Shared.hs | 13 +++++++++++++ src/Text/Pandoc/Writers/FB2.hs | 11 +++-------- src/Text/Pandoc/Writers/Markdown.hs | 10 +++------- tests/Tests/Writers/Plain.hs | 21 +++++++++++++++++++++ tests/test-pandoc.hs | 2 ++ 5 files changed, 42 insertions(+), 15 deletions(-) create mode 100644 tests/Tests/Writers/Plain.hs diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 9acb959a0..f0e5bbe5d 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -59,6 +59,7 @@ module Text.Pandoc.Shared ( normalizeBlocks, removeFormatting, stringify, + capitalize, compactify, compactify', compactify'DL, @@ -122,6 +123,7 @@ import qualified Data.ByteString.Char8 as B8 import Text.Pandoc.Compat.Monoid import Data.ByteString.Base64 (decodeLenient) import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr) +import qualified Data.Text as T (toUpper, pack, unpack) #ifdef EMBED_DATA_FILES import Text.Pandoc.Data (dataFiles) @@ -527,6 +529,17 @@ stringify = query go . walk deNote deNote (Note _) = Str "" deNote x = x +-- | Bring all regular text in a pandoc structure to uppercase. +-- +-- This function correctly handles cases where a lowercase character doesn't +-- match to a single uppercase character – e.g. “Straße” would be converted +-- to “STRASSE”, not “STRAßE”. +capitalize :: Walkable Inline a => a -> a +capitalize = walk go + where go :: Inline -> Inline + go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s) + go x = x + -- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. compactify :: [[Block]] -- ^ List of list items (each a list of blocks) diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 803617f95..7a9bff4fe 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -28,7 +28,7 @@ module Text.Pandoc.Writers.FB2 (writeFB2) where import Control.Monad.State (StateT, evalStateT, get, modify) import Control.Monad.State (liftM, liftM2, liftIO) import Data.ByteString.Base64 (encode) -import Data.Char (toUpper, toLower, isSpace, isAscii, isControl) +import Data.Char (toLower, isSpace, isAscii, isControl) import Data.List (intersperse, intercalate, isPrefixOf) import Data.Either (lefts, rights) import Network.Browser (browse, request, setAllowRedirects, setOutHandler) @@ -44,8 +44,7 @@ import qualified Text.XML.Light.Cursor as XC import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def) -import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock) -import Text.Pandoc.Walk +import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -421,10 +420,6 @@ indent = indentBlock indentLines ins = let lns = split isLineBreak ins :: [[Inline]] in intercalate [LineBreak] $ map ((Str spacer):) lns -capitalize :: Inline -> Inline -capitalize (Str xs) = Str $ map toUpper xs -capitalize x = x - -- | Convert a Pandoc's Inline element to FictionBook XML representation. toXml :: Inline -> FBM [Content] toXml (Str s) = return [txt s] @@ -434,7 +429,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss toXml (Superscript ss) = list `liftM` wrap "sup" ss toXml (Subscript ss) = list `liftM` wrap "sub" ss -toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss +toXml (SmallCaps ss) = cMapM toXml $ capitalize ss toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific inner <- cMapM toXml ss return $ [txt "‘"] ++ inner ++ [txt "’"] diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 897e425c6..a859267cc 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -39,7 +39,7 @@ import Text.Pandoc.Writers.Shared import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy ) -import Data.Char ( isSpace, isPunctuation, toUpper ) +import Data.Char ( isSpace, isPunctuation ) import Data.Ord ( comparing ) import Text.Pandoc.Pretty import Control.Monad.State @@ -672,10 +672,6 @@ escapeSpaces (Str s) = Str $ substitute " " "\\ " s escapeSpaces Space = Str "\\ " escapeSpaces x = x -toCaps :: Inline -> Inline -toCaps (Str s) = Str (map toUpper s) -toCaps x = x - -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Span attrs ils) = do @@ -693,7 +689,7 @@ inlineToMarkdown opts (Emph lst) = do inlineToMarkdown opts (Strong lst) = do plain <- gets stPlain if plain - then inlineListToMarkdown opts $ walk toCaps lst + then inlineListToMarkdown opts $ capitalize lst else do contents <- inlineListToMarkdown opts lst return $ "**" <> contents <> "**" @@ -716,7 +712,7 @@ inlineToMarkdown opts (Subscript lst) = do inlineToMarkdown opts (SmallCaps lst) = do plain <- gets stPlain if plain - then inlineListToMarkdown opts $ walk toCaps lst + then inlineListToMarkdown opts $ capitalize lst else do contents <- inlineListToMarkdown opts lst return $ tagWithAttrs "span" diff --git a/tests/Tests/Writers/Plain.hs b/tests/Tests/Writers/Plain.hs new file mode 100644 index 000000000..f8f1d3d90 --- /dev/null +++ b/tests/Tests/Writers/Plain.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Writers.Plain (tests) where + +import Test.Framework +import Text.Pandoc.Builder +import Text.Pandoc +import Tests.Helpers +import Tests.Arbitrary() + + +infix 4 =: +(=:) :: (ToString a, ToPandoc a) + => String -> (a, String) -> Test +(=:) = test (writePlain def . toPandoc) + + +tests :: [Test] +tests = [ "strongly emphasized text to uppercase" + =: strong "Straße" + =?> "STRASSE" + ] diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 1dab8e6f1..e6924f6b2 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -17,6 +17,7 @@ import qualified Tests.Writers.HTML import qualified Tests.Writers.Docbook import qualified Tests.Writers.Native import qualified Tests.Writers.Markdown +import qualified Tests.Writers.Plain import qualified Tests.Writers.AsciiDoc import qualified Tests.Shared import qualified Tests.Walk @@ -33,6 +34,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "HTML" Tests.Writers.HTML.tests , testGroup "Docbook" Tests.Writers.Docbook.tests , testGroup "Markdown" Tests.Writers.Markdown.tests + , testGroup "Plain" Tests.Writers.Plain.tests , testGroup "AsciiDoc" Tests.Writers.AsciiDoc.tests ] , testGroup "Readers" -- cgit v1.2.3