aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig S. Bosma <craig.bosma@gmail.com>2015-03-09 07:11:53 -0500
committerCraig S. Bosma <craig.bosma@gmail.com>2015-03-09 07:11:53 -0500
commit513221f822e7086b9841e59be23feb40b82f4977 (patch)
treea15361d2972981261e2fefb18f025ad15b63c77c
parentc7c45918dc5cc34fe47186c62027802bcd6b3b9a (diff)
downloadpandoc-513221f822e7086b9841e59be23feb40b82f4977.tar.gz
Org reader: add support for smart punctuation
-rw-r--r--src/Text/Pandoc/Readers/Org.hs58
-rw-r--r--tests/Tests/Readers/Org.hs24
2 files changed, 74 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index f16aed48d..4a523657c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,6 +1,6 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
{-
Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de>
@@ -49,7 +49,7 @@ import Control.Applicative ( Applicative, pure
, (<$>), (<$), (<*>), (<*), (*>) )
import Control.Arrow (first)
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
-import Control.Monad.Reader (Reader, runReader, ask, asks)
+import Control.Monad.Reader (Reader, runReader, ask, asks, local)
import Data.Char (isAlphaNum, toLower)
import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
@@ -62,9 +62,11 @@ import Network.HTTP (urlEncode)
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
-readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
+readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
-type OrgParser = Parser [Char] OrgParserState
+data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
+
+type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
parseOrg :: OrgParser Pandoc
parseOrg = do
@@ -125,6 +127,9 @@ data OrgParserState = OrgParserState
, orgStateNotes' :: OrgNoteTable
}
+instance Default OrgParserLocal where
+ def = OrgParserLocal NoQuote
+
instance HasReaderOptions OrgParserState where
extractReaderOptions = orgStateOptions
@@ -138,6 +143,10 @@ instance HasLastStrPosition OrgParserState where
getLastStrPos = orgStateLastStrPos
setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+instance HasQuoteContext st (Reader OrgParserLocal) where
+ getQuoteContext = asks orgLocalQuoteContext
+ withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q})
+
instance Default OrgParserState where
def = defaultOrgParserState
@@ -964,6 +973,7 @@ inline =
, subscript
, superscript
, inlineLaTeX
+ , smart
, symbol
] <* (guard =<< newlinesCountWithinLimits)
<?> "inline"
@@ -1270,12 +1280,16 @@ displayMath :: OrgParser (F Inlines)
displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
, rawMathBetween "$$" "$$"
]
+
+updatePositions :: Char
+ -> OrgParser (Char)
+updatePositions c = do
+ when (c `elem` emphasisPreChars) updateLastPreCharPos
+ when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
+ return c
+
symbol :: OrgParser (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
- where updatePositions c = do
- when (c `elem` emphasisPreChars) updateLastPreCharPos
- when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
- return c
emphasisBetween :: Char
-> OrgParser (F Inlines)
@@ -1486,3 +1500,31 @@ inlineLaTeXCommand = try $ do
count len anyChar
return cs
_ -> mzero
+
+smart :: OrgParser (F Inlines)
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice (map (return <$>) [orgApostrophe, dash, ellipses])
+ where orgApostrophe =
+ (char '\'' <|> char '\8217') <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+ *> return (B.str "\x2019")
+
+singleQuoted :: OrgParser (F Inlines)
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ fmap B.singleQuoted . trimInlinesF . mconcat <$>
+ many1Till inline singleQuoteEnd
+
+-- doubleQuoted will handle regular double-quoted sections, as well
+-- as dialogues with an open double-quote without a close double-quote
+-- in the same paragraph.
+doubleQuoted :: OrgParser (F Inlines)
+doubleQuoted = try $ do
+ doubleQuoteStart
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
+ (fmap B.doubleQuoted . trimInlinesF $ contents))
+ <|> (return $ return (B.str "\8220") <> contents)
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 39c40cd45..c373d52cc 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -12,6 +12,9 @@ import Data.Monoid (mempty, mappend, mconcat)
org :: String -> Pandoc
org = readOrg def
+orgSmart :: String -> Pandoc
+orgSmart = readOrg def { readerSmart = True }
+
infix 4 =:
(=:) :: ToString c
=> String -> (String, c) -> Test
@@ -1152,4 +1155,25 @@ tests =
]
in codeBlockWith ( "", classes, params) "code body\n"
]
+ , testGroup "Smart punctuation"
+ [ test orgSmart "quote before ellipses"
+ ("'...hi'"
+ =?> para (singleQuoted "…hi"))
+
+ , test orgSmart "apostrophe before emph"
+ ("D'oh! A l'/aide/!"
+ =?> para ("D’oh! A l’" <> emph "aide" <> "!"))
+
+ , test orgSmart "apostrophe in French"
+ ("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»"
+ =?> para "À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»")
+
+ , test orgSmart "Quotes cannot occur at the end of emphasized text"
+ ("/say \"yes\"/" =?>
+ para ("/say" <> space <> doubleQuoted "yes" <> "/"))
+
+ , test orgSmart "Dashes are allowed at the borders of emphasis'"
+ ("/foo---/" =?>
+ para (emph "foo—"))
+ ]
]