From 925a4c5164026bfda25bf50b552bacec074fdf3f Mon Sep 17 00:00:00 2001
From: John MacFarlane <fiddlosopher@gmail.com>
Date: Thu, 29 Dec 2011 23:44:12 -0800
Subject: Better smart quote parsing.

* Added stateLastStrPos to ParserState. This lets us keep track
  of whether we're parsing the position immediately after a 'str'.
  If we encounter a ' in such a location, it must be an apostrophe,
  and can't be a single quote start.

* Set this in the markdown, textile, html, and rst str parsers.

* Closes #360.
---
 src/Text/Pandoc/Parsing.hs          | 8 +++++++-
 src/Text/Pandoc/Readers/HTML.hs     | 8 ++++++--
 src/Text/Pandoc/Readers/Markdown.hs | 2 ++
 src/Text/Pandoc/Readers/RST.hs      | 6 +++++-
 src/Text/Pandoc/Readers/Textile.hs  | 2 ++
 5 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 5fa375ca6..c2c512033 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -603,6 +603,7 @@ data ParserState = ParserState
     { stateParseRaw        :: Bool,          -- ^ Parse raw HTML and LaTeX?
       stateParserContext   :: ParserContext, -- ^ Inside list?
       stateQuoteContext    :: QuoteContext,  -- ^ Inside quoted environment?
+      stateLastStrPos      :: Maybe SourcePos, -- ^ Position after last str parsed
       stateKeys            :: KeyTable,      -- ^ List of reference keys
       stateCitations       :: [String],      -- ^ List of available citations
       stateNotes           :: NoteTable,     -- ^ List of notes
@@ -630,6 +631,7 @@ defaultParserState =
     ParserState { stateParseRaw        = False,
                   stateParserContext   = NullState,
                   stateQuoteContext    = NoQuote,
+                  stateLastStrPos      = Nothing,
                   stateKeys            = M.empty,
                   stateCitations       = [],
                   stateNotes           = [],
@@ -751,8 +753,12 @@ charOrRef cs =
                        return c)
 
 singleQuoteStart :: GenParser Char ParserState ()
-singleQuoteStart = do 
+singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
+  pos <- getPosition
+  st <- getState
+  -- single quote start can't be right after str
+  guard $ stateLastStrPos st /= Just pos
   try $ do charOrRef "'\8216\145"
            notFollowedBy (oneOf ")!],;:-? \t\n")
            notFollowedBy (char '.') <|> lookAhead (string "..." >> return ())
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 70ea24680..179065413 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -421,8 +421,12 @@ pTagContents =
   pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
 
 pStr :: GenParser Char ParserState Inline
-pStr = liftM Str $ many1 $ satisfy $ \c ->
-           not (isSpace c) && not (isSpecial c) && not (isBad c)
+pStr = do
+  result <- many1 $ satisfy $ \c ->
+                     not (isSpace c) && not (isSpecial c) && not (isBad c)
+  pos <- getPosition
+  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  return $ Str result
 
 isSpecial :: Char -> Bool
 isSpecial '"' = True
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index db68df629..d854bd3c7 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1096,6 +1096,8 @@ str = do
                          lookAhead alphaNum >> return '\x2019')
                          -- for things like l'aide
                    else mzero
+  pos <- getPosition
+  updateState $ \s -> s{ stateLastStrPos = Just pos }
   let result = a:as
   let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
   if smart
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 3dcfe47d0..d8704d8c9 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -791,7 +791,11 @@ whitespace :: GenParser Char ParserState Inline
 whitespace = many1 spaceChar >> return Space <?> "whitespace"
 
 str :: GenParser Char ParserState Inline
-str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
+str = do
+  result <- many1 (noneOf (specialChars ++ "\t\n "))
+  pos <- getPosition
+  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  return $ Str result
 
 -- an endline character that can be treated as a space, not a structural break
 endline :: GenParser Char ParserState Inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 12d299aa4..4693bd06d 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -436,6 +436,8 @@ str = do
               next <- lookAhead letter
               guard $ isLetter (last xs) || isLetter next
               return $ xs ++ "-"
+  pos <- getPosition
+  updateState $ \s -> s{ stateLastStrPos = Just pos }
   return $ Str result
 
 -- | Textile allows HTML span infos, we discard them
-- 
cgit v1.2.3