diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2012-01-01 13:48:28 -0800 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2012-01-01 13:48:28 -0800 |
commit | da8425598a8ab4a98388e8ee346a2ae7ec540aa0 (patch) | |
tree | 20078094309f4fc57ea67e5b2d17163f86078ec1 /src/Text | |
parent | 3cf60c73061f247b531da4b3c18664c6134bee53 (diff) | |
download | pandoc-da8425598a8ab4a98388e8ee346a2ae7ec540aa0.tar.gz |
New treatment of dashes in --smart mode.
* `---` is always em-dash, `--` is always en-dash.
* pandoc no longer tries to guess when `-` should be en-dash.
* A new option, `--old-dashes`, is provided for legacy documents.
Rationale: The rules for en-dash are too complex and
language-dependent for a guesser to work reliably. This
change gives users greater control. The alternative of
using unicode isn't very good, since unicode em- and en-
dashes are barely distinguishable in a monospace font.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 34 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Textile.hs | 3 |
2 files changed, 31 insertions, 6 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index c2c512033..71da3a730 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -614,6 +614,9 @@ data ParserState = ParserState stateDate :: [Inline], -- ^ Date of document stateStrict :: Bool, -- ^ Use strict markdown syntax? stateSmart :: Bool, -- ^ Use smart typography? + stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior + -- in parsing dashes; -- is em-dash; + -- before numeral is en-dash stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell stateColumns :: Int, -- ^ Number of columns in terminal stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used @@ -642,6 +645,7 @@ defaultParserState = stateDate = [], stateStrict = False, stateSmart = False, + stateOldDashes = False, stateLiterateHaskell = False, stateColumns = 80, stateHeaderTable = [], @@ -788,17 +792,37 @@ ellipses = do try (charOrRef "\8230\133") <|> try (string "..." >> return '…') return (Str "\8230") -dash :: GenParser Char st Inline -dash = enDash <|> emDash +dash :: GenParser Char ParserState Inline +dash = do + oldDashes <- stateOldDashes `fmap` getState + if oldDashes + then emDashOld <|> enDashOld + else Str `fmap` (hyphenDash <|> emDash <|> enDash) -enDash :: GenParser Char st Inline +-- Two hyphens = en-dash, three = em-dash +hyphenDash :: GenParser Char st String +hyphenDash = do + try $ string "--" + option "\8211" (char '-' >> return "\8212") + +emDash :: GenParser Char st String +emDash = do + try (charOrRef "\8212\151") + return "\8212" + +enDash :: GenParser Char st String enDash = do + try (charOrRef "\8212\151") + return "\8211" + +enDashOld :: GenParser Char st Inline +enDashOld = do try (charOrRef "\8211\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') return (Str "\8211") -emDash :: GenParser Char st Inline -emDash = do +emDashOld :: GenParser Char st Inline +emDashOld = do try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') return (Str "\8212") diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 4693bd06d..3b5954368 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -68,7 +68,8 @@ import Control.Monad ( guard, liftM ) readTextile :: ParserState -- ^ Parser state, including options for parser -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readTextile state s = (readWith parseTextile) state (s ++ "\n\n") +readTextile state s = + (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n") -- |