aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtyom Kazak <yom@artyom.me>2014-08-03 16:48:55 +0400
committerArtyom Kazak <yom@artyom.me>2014-08-03 17:37:37 +0400
commitec88d47f23d6761cf2120f76e45ca23cdc478e6c (patch)
tree97506dc41a66d0050b94d4a1c086c9e28d2a7927
parent842c705097fbb3987145eae85da2261cb264e618 (diff)
downloadpandoc-ec88d47f23d6761cf2120f76e45ca23cdc478e6c.tar.gz
Correctly implement capitalisation.
Using `map toUpper` to capitalise text is wrong, as e.g. “Straße” should be converted to “STRASSE”, which is 1 character longer. This commit adds a `capitalize` function and replaces 2 identical implementations in different modules (`toCaps` and `capitalize`) with it.
-rw-r--r--src/Text/Pandoc/Shared.hs13
-rw-r--r--src/Text/Pandoc/Writers/FB2.hs11
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs10
-rw-r--r--tests/Tests/Writers/Plain.hs21
-rw-r--r--tests/test-pandoc.hs2
5 files changed, 42 insertions, 15 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 9acb959a0..f0e5bbe5d 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -59,6 +59,7 @@ module Text.Pandoc.Shared (
normalizeBlocks,
removeFormatting,
stringify,
+ capitalize,
compactify,
compactify',
compactify'DL,
@@ -122,6 +123,7 @@ import qualified Data.ByteString.Char8 as B8
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
+import qualified Data.Text as T (toUpper, pack, unpack)
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
@@ -527,6 +529,17 @@ stringify = query go . walk deNote
deNote (Note _) = Str ""
deNote x = x
+-- | Bring all regular text in a pandoc structure to uppercase.
+--
+-- This function correctly handles cases where a lowercase character doesn't
+-- match to a single uppercase character – e.g. “Straße” would be converted
+-- to “STRASSE”, not “STRAßE”.
+capitalize :: Walkable Inline a => a -> a
+capitalize = walk go
+ where go :: Inline -> Inline
+ go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
+ go x = x
+
-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)
diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs
index 803617f95..7a9bff4fe 100644
--- a/src/Text/Pandoc/Writers/FB2.hs
+++ b/src/Text/Pandoc/Writers/FB2.hs
@@ -28,7 +28,7 @@ module Text.Pandoc.Writers.FB2 (writeFB2) where
import Control.Monad.State (StateT, evalStateT, get, modify)
import Control.Monad.State (liftM, liftM2, liftIO)
import Data.ByteString.Base64 (encode)
-import Data.Char (toUpper, toLower, isSpace, isAscii, isControl)
+import Data.Char (toLower, isSpace, isAscii, isControl)
import Data.List (intersperse, intercalate, isPrefixOf)
import Data.Either (lefts, rights)
import Network.Browser (browse, request, setAllowRedirects, setOutHandler)
@@ -44,8 +44,7 @@ import qualified Text.XML.Light.Cursor as XC
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
-import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock)
-import Text.Pandoc.Walk
+import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize)
-- | Data to be written at the end of the document:
-- (foot)notes, URLs, references, images.
@@ -421,10 +420,6 @@ indent = indentBlock
indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
in intercalate [LineBreak] $ map ((Str spacer):) lns
-capitalize :: Inline -> Inline
-capitalize (Str xs) = Str $ map toUpper xs
-capitalize x = x
-
-- | Convert a Pandoc's Inline element to FictionBook XML representation.
toXml :: Inline -> FBM [Content]
toXml (Str s) = return [txt s]
@@ -434,7 +429,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss
toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
toXml (Superscript ss) = list `liftM` wrap "sup" ss
toXml (Subscript ss) = list `liftM` wrap "sub" ss
-toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss
+toXml (SmallCaps ss) = cMapM toXml $ capitalize ss
toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific
inner <- cMapM toXml ss
return $ [txt "‘"] ++ inner ++ [txt "’"]
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 897e425c6..a859267cc 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -39,7 +39,7 @@ import Text.Pandoc.Writers.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy )
-import Data.Char ( isSpace, isPunctuation, toUpper )
+import Data.Char ( isSpace, isPunctuation )
import Data.Ord ( comparing )
import Text.Pandoc.Pretty
import Control.Monad.State
@@ -672,10 +672,6 @@ escapeSpaces (Str s) = Str $ substitute " " "\\ " s
escapeSpaces Space = Str "\\ "
escapeSpaces x = x
-toCaps :: Inline -> Inline
-toCaps (Str s) = Str (map toUpper s)
-toCaps x = x
-
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
inlineToMarkdown opts (Span attrs ils) = do
@@ -693,7 +689,7 @@ inlineToMarkdown opts (Emph lst) = do
inlineToMarkdown opts (Strong lst) = do
plain <- gets stPlain
if plain
- then inlineListToMarkdown opts $ walk toCaps lst
+ then inlineListToMarkdown opts $ capitalize lst
else do
contents <- inlineListToMarkdown opts lst
return $ "**" <> contents <> "**"
@@ -716,7 +712,7 @@ inlineToMarkdown opts (Subscript lst) = do
inlineToMarkdown opts (SmallCaps lst) = do
plain <- gets stPlain
if plain
- then inlineListToMarkdown opts $ walk toCaps lst
+ then inlineListToMarkdown opts $ capitalize lst
else do
contents <- inlineListToMarkdown opts lst
return $ tagWithAttrs "span"
diff --git a/tests/Tests/Writers/Plain.hs b/tests/Tests/Writers/Plain.hs
new file mode 100644
index 000000000..f8f1d3d90
--- /dev/null
+++ b/tests/Tests/Writers/Plain.hs
@@ -0,0 +1,21 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Tests.Writers.Plain (tests) where
+
+import Test.Framework
+import Text.Pandoc.Builder
+import Text.Pandoc
+import Tests.Helpers
+import Tests.Arbitrary()
+
+
+infix 4 =:
+(=:) :: (ToString a, ToPandoc a)
+ => String -> (a, String) -> Test
+(=:) = test (writePlain def . toPandoc)
+
+
+tests :: [Test]
+tests = [ "strongly emphasized text to uppercase"
+ =: strong "Straße"
+ =?> "STRASSE"
+ ]
diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs
index 1dab8e6f1..e6924f6b2 100644
--- a/tests/test-pandoc.hs
+++ b/tests/test-pandoc.hs
@@ -17,6 +17,7 @@ import qualified Tests.Writers.HTML
import qualified Tests.Writers.Docbook
import qualified Tests.Writers.Native
import qualified Tests.Writers.Markdown
+import qualified Tests.Writers.Plain
import qualified Tests.Writers.AsciiDoc
import qualified Tests.Shared
import qualified Tests.Walk
@@ -33,6 +34,7 @@ tests = [ testGroup "Old" Tests.Old.tests
, testGroup "HTML" Tests.Writers.HTML.tests
, testGroup "Docbook" Tests.Writers.Docbook.tests
, testGroup "Markdown" Tests.Writers.Markdown.tests
+ , testGroup "Plain" Tests.Writers.Plain.tests
, testGroup "AsciiDoc" Tests.Writers.AsciiDoc.tests
]
, testGroup "Readers"