diff options
author | Craig S. Bosma <craig.bosma@gmail.com> | 2015-03-09 07:11:53 -0500 |
---|---|---|
committer | Craig S. Bosma <craig.bosma@gmail.com> | 2015-03-09 07:11:53 -0500 |
commit | 513221f822e7086b9841e59be23feb40b82f4977 (patch) | |
tree | a15361d2972981261e2fefb18f025ad15b63c77c | |
parent | c7c45918dc5cc34fe47186c62027802bcd6b3b9a (diff) | |
download | pandoc-513221f822e7086b9841e59be23feb40b82f4977.tar.gz |
Org reader: add support for smart punctuation
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 58 | ||||
-rw-r--r-- | tests/Tests/Readers/Org.hs | 24 |
2 files changed, 74 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index f16aed48d..4a523657c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,6 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-} {- Copyright (C) 2014 Albert Krewinkel <tarleb@moltkeplatz.de> @@ -49,7 +49,7 @@ import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>) ) import Control.Arrow (first) import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when) -import Control.Monad.Reader (Reader, runReader, ask, asks) +import Control.Monad.Reader (Reader, runReader, ask, asks, local) import Data.Char (isAlphaNum, toLower) import Data.Default import Data.List (intersperse, isPrefixOf, isSuffixOf) @@ -62,9 +62,11 @@ import Network.HTTP (urlEncode) readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") +readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") -type OrgParser = Parser [Char] OrgParserState +data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } + +type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) parseOrg :: OrgParser Pandoc parseOrg = do @@ -125,6 +127,9 @@ data OrgParserState = OrgParserState , orgStateNotes' :: OrgNoteTable } +instance Default OrgParserLocal where + def = OrgParserLocal NoQuote + instance HasReaderOptions OrgParserState where extractReaderOptions = orgStateOptions @@ -138,6 +143,10 @@ instance HasLastStrPosition OrgParserState where getLastStrPos = orgStateLastStrPos setLastStrPos pos st = st{ orgStateLastStrPos = Just pos } +instance HasQuoteContext st (Reader OrgParserLocal) where + getQuoteContext = asks orgLocalQuoteContext + withQuoteContext q = local (\s -> s{orgLocalQuoteContext = q}) + instance Default OrgParserState where def = defaultOrgParserState @@ -964,6 +973,7 @@ inline = , subscript , superscript , inlineLaTeX + , smart , symbol ] <* (guard =<< newlinesCountWithinLimits) <?> "inline" @@ -1270,12 +1280,16 @@ displayMath :: OrgParser (F Inlines) displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" , rawMathBetween "$$" "$$" ] + +updatePositions :: Char + -> OrgParser (Char) +updatePositions c = do + when (c `elem` emphasisPreChars) updateLastPreCharPos + when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos + return c + symbol :: OrgParser (F Inlines) symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) - where updatePositions c = do - when (c `elem` emphasisPreChars) updateLastPreCharPos - when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos - return c emphasisBetween :: Char -> OrgParser (F Inlines) @@ -1486,3 +1500,31 @@ inlineLaTeXCommand = try $ do count len anyChar return cs _ -> mzero + +smart :: OrgParser (F Inlines) +smart = do + getOption readerSmart >>= guard + doubleQuoted <|> singleQuoted <|> + choice (map (return <$>) [orgApostrophe, dash, ellipses]) + where orgApostrophe = + (char '\'' <|> char '\8217') <* updateLastPreCharPos + <* updateLastForbiddenCharPos + *> return (B.str "\x2019") + +singleQuoted :: OrgParser (F Inlines) +singleQuoted = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ + fmap B.singleQuoted . trimInlinesF . mconcat <$> + many1Till inline singleQuoteEnd + +-- doubleQuoted will handle regular double-quoted sections, as well +-- as dialogues with an open double-quote without a close double-quote +-- in the same paragraph. +doubleQuoted :: OrgParser (F Inlines) +doubleQuoted = try $ do + doubleQuoteStart + contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) + (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return + (fmap B.doubleQuoted . trimInlinesF $ contents)) + <|> (return $ return (B.str "\8220") <> contents) diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 39c40cd45..c373d52cc 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -12,6 +12,9 @@ import Data.Monoid (mempty, mappend, mconcat) org :: String -> Pandoc org = readOrg def +orgSmart :: String -> Pandoc +orgSmart = readOrg def { readerSmart = True } + infix 4 =: (=:) :: ToString c => String -> (String, c) -> Test @@ -1152,4 +1155,25 @@ tests = ] in codeBlockWith ( "", classes, params) "code body\n" ] + , testGroup "Smart punctuation" + [ test orgSmart "quote before ellipses" + ("'...hi'" + =?> para (singleQuoted "…hi")) + + , test orgSmart "apostrophe before emph" + ("D'oh! A l'/aide/!" + =?> para ("D’oh! A l’" <> emph "aide" <> "!")) + + , test orgSmart "apostrophe in French" + ("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»" + =?> para "À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»") + + , test orgSmart "Quotes cannot occur at the end of emphasized text" + ("/say \"yes\"/" =?> + para ("/say" <> space <> doubleQuoted "yes" <> "/")) + + , test orgSmart "Dashes are allowed at the borders of emphasis'" + ("/foo---/" =?> + para (emph "foo—")) + ] ] |