aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-01-01 13:48:28 -0800
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-01-01 13:48:28 -0800
commitda8425598a8ab4a98388e8ee346a2ae7ec540aa0 (patch)
tree20078094309f4fc57ea67e5b2d17163f86078ec1 /src/Text
parent3cf60c73061f247b531da4b3c18664c6134bee53 (diff)
downloadpandoc-da8425598a8ab4a98388e8ee346a2ae7ec540aa0.tar.gz
New treatment of dashes in --smart mode.
* `---` is always em-dash, `--` is always en-dash. * pandoc no longer tries to guess when `-` should be en-dash. * A new option, `--old-dashes`, is provided for legacy documents. Rationale: The rules for en-dash are too complex and language-dependent for a guesser to work reliably. This change gives users greater control. The alternative of using unicode isn't very good, since unicode em- and en- dashes are barely distinguishable in a monospace font.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Parsing.hs34
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs3
2 files changed, 31 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index c2c512033..71da3a730 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -614,6 +614,9 @@ data ParserState = ParserState
stateDate :: [Inline], -- ^ Date of document
stateStrict :: Bool, -- ^ Use strict markdown syntax?
stateSmart :: Bool, -- ^ Use smart typography?
+ stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior
+ -- in parsing dashes; -- is em-dash;
+ -- before numeral is en-dash
stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
stateColumns :: Int, -- ^ Number of columns in terminal
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
@@ -642,6 +645,7 @@ defaultParserState =
stateDate = [],
stateStrict = False,
stateSmart = False,
+ stateOldDashes = False,
stateLiterateHaskell = False,
stateColumns = 80,
stateHeaderTable = [],
@@ -788,17 +792,37 @@ ellipses = do
try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
return (Str "\8230")
-dash :: GenParser Char st Inline
-dash = enDash <|> emDash
+dash :: GenParser Char ParserState Inline
+dash = do
+ oldDashes <- stateOldDashes `fmap` getState
+ if oldDashes
+ then emDashOld <|> enDashOld
+ else Str `fmap` (hyphenDash <|> emDash <|> enDash)
-enDash :: GenParser Char st Inline
+-- Two hyphens = en-dash, three = em-dash
+hyphenDash :: GenParser Char st String
+hyphenDash = do
+ try $ string "--"
+ option "\8211" (char '-' >> return "\8212")
+
+emDash :: GenParser Char st String
+emDash = do
+ try (charOrRef "\8212\151")
+ return "\8212"
+
+enDash :: GenParser Char st String
enDash = do
+ try (charOrRef "\8212\151")
+ return "\8211"
+
+enDashOld :: GenParser Char st Inline
+enDashOld = do
try (charOrRef "\8211\150") <|>
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
return (Str "\8211")
-emDash :: GenParser Char st Inline
-emDash = do
+emDashOld :: GenParser Char st Inline
+emDashOld = do
try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
return (Str "\8212")
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 4693bd06d..3b5954368 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -68,7 +68,8 @@ import Control.Monad ( guard, liftM )
readTextile :: ParserState -- ^ Parser state, including options for parser
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
-readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
+readTextile state s =
+ (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n")
--