Parsing: Simplified dash and ellipsis.

This originated with @dubiousjim's observation in #1419 that there was a typo in the definition of enDash. It returned an em dash character instead of an en dash. I thought about why this had not been noticed before, and realized that en dashes were just being parsed as regular symbols. That made me realize that, now that we no longer have dedicate EnDash, EmDash, and Ellipses inline elements, as we used to in pandoc, we no longer need to parse the unicode characters specially. This allowed a considerable simplification of the code. Partially resolves #1419.
author: John MacFarlane <jgm@berkeley.edu> 2014-07-12 22:59:35 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2014-07-12 23:44:56 -0700
commit: 47a5f04761d9921ee273e8264e26409c806a912d (patch)
tree: 6197a1ce338bd8495a2b6d033093242d7cf54fb6 /src/Text/Pandoc
parent: 4676bfdf825a2b5b205d6057462d317c00c6b354 (diff)
download: pandoc-47a5f04761d9921ee273e8264e26409c806a912d.tar.gz
1 files changed, 13 insertions, 40 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d775b3f36..f77ce60d8 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -169,7 +169,7 @@ import Text.Pandoc.XML (fromEntities)
 import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
 import Text.Parsec
 import Text.Parsec.Pos (newPos)
-import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum, isDigit,
+import Data.Char ( toLower, toUpper, ord, chr, isAscii, isAlphaNum,
                    isHexDigit, isSpace )
 import Data.List ( intercalate, transpose )
 import Text.Pandoc.Shared
@@ -1124,48 +1124,21 @@ doubleQuoteEnd = void (charOrRef "\"\8221\148")
 
 ellipses :: Stream s m Char
          => ParserT s st m Inlines
-ellipses = do
-  try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
-  return (B.str "\8230")
+ellipses = try (string "..." >> return (B.str "\8230"))
 
-dash :: Stream s m Char => ParserT s ParserState m Inlines
-dash = do
+dash :: (HasReaderOptions st, Stream s m Char)
+     => ParserT s st m Inlines
+dash = try $ do
   oldDashes <- getOption readerOldDashes
   if oldDashes
-     then emDashOld <|> enDashOld
-     else B.str <$> (hyphenDash <|> emDash <|> enDash)
-
--- Two hyphens = en-dash, three = em-dash
-hyphenDash :: Stream s m Char
-           => ParserT s st m String
-hyphenDash = do
-  try $ string "--"
-  option "\8211" (char '-' >> return "\8212")
-
-emDash :: Stream s m Char
-       => ParserT s st m String
-emDash = do
-  try (charOrRef "\8212\151")
-  return "\8212"
-
-enDash :: Stream s m Char
-       => ParserT s st m String
-enDash = do
-  try (charOrRef "\8212\151")
-  return "\8211"
-
-enDashOld :: Stream s m Char
-          => ParserT s st m Inlines
-enDashOld = do
-  try (charOrRef "\8211\150") <|>
-    try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
-  return (B.str "\8211")
-
-emDashOld :: Stream s m Char
-          => ParserT s st m Inlines
-emDashOld = do
-  try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
-  return (B.str "\8212")
+     then do
+       char '-'
+       (char '-' >> return (B.str "\8212"))
+         <|> (lookAhead digit >> return (B.str "\8211"))
+     else do
+       string "--"
+       (char '-' >> return (B.str "\8212"))
+         <|> return (B.str "\8211")
 
 -- This is used to prevent exponential blowups for things like:
 -- a**a*a**a*a**a*a**a*a**a*a**a*a**
author	John MacFarlane <jgm@berkeley.edu>	2014-07-12 22:59:35 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2014-07-12 23:44:56 -0700
commit	47a5f04761d9921ee273e8264e26409c806a912d (patch)
tree	6197a1ce338bd8495a2b6d033093242d7cf54fb6 /src/Text/Pandoc
parent	4676bfdf825a2b5b205d6057462d317c00c6b354 (diff)
download	pandoc-47a5f04761d9921ee273e8264e26409c806a912d.tar.gz