aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-03-05 10:24:39 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2017-03-05 10:24:39 +0100
commit95f2726ee74a7770bf0eb5fe5629c493addbe298 (patch)
tree9970c4febe033118e910e465e94aca9c41f34771 /src/Text
parent7fc6919f9039a3c71028b807d4372f18cf35bee8 (diff)
downloadpandoc-95f2726ee74a7770bf0eb5fe5629c493addbe298.tar.gz
Added readerAbbreviations to ParserState.
Markdown reader now consults this to determine what is an abbreviation. Eventually it will be possible to specify a custom list (see #256).
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Options.hs11
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs41
2 files changed, 27 insertions, 25 deletions
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 6db53c3dc..0379b0ddf 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -47,6 +47,7 @@ module Text.Pandoc.Options ( module Text.Pandoc.Extensions
) where
import Data.Data (Data)
import Data.Default
+import qualified Data.Set as Set
import Data.Typeable (Typeable)
import GHC.Generics (Generic)
import Text.Pandoc.Extensions
@@ -60,6 +61,7 @@ data ReaderOptions = ReaderOptions{
, readerApplyMacros :: Bool -- ^ Apply macros to TeX math
, readerIndentedCodeClasses :: [String] -- ^ Default classes for
-- indented code blocks
+ , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations
, readerDefaultImageExtension :: String -- ^ Default extension for images
, readerTrackChanges :: TrackChanges
} deriving (Show, Read, Data, Typeable, Generic)
@@ -72,10 +74,19 @@ instance Default ReaderOptions
, readerTabStop = 4
, readerApplyMacros = True
, readerIndentedCodeClasses = []
+ , readerAbbreviations = defaultAbbrevs
, readerDefaultImageExtension = ""
, readerTrackChanges = AcceptChanges
}
+defaultAbbrevs :: Set.Set String
+defaultAbbrevs = Set.fromList
+ [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.",
+ "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.",
+ "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.",
+ "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.",
+ "ch.", "sec.", "cf.", "cp."]
+
--
-- Writer options
--
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4790f83ff..abaa907e0 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -42,6 +42,7 @@ import Data.Maybe
import Data.Monoid ((<>))
import Data.Ord (comparing)
import Data.Scientific (base10Exponent, coefficient)
+import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Vector as V
@@ -1688,32 +1689,22 @@ nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
- result <- many1 alphaNum
+ result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
updateLastStrPos
- let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
- isSmart <- extensionEnabled Ext_smart <$> getOption readerExtensions
- if isSmart
- then case likelyAbbrev result of
- [] -> return $ return $ B.str result
- xs -> choice (map (\x ->
- try (string x >> oneOf " \n" >>
- lookAhead alphaNum >>
- return (return $ B.str
- $ result ++ spacesToNbr x ++ "\160"))) xs)
- <|> (return $ return $ B.str result)
- else return $ return $ B.str result
-
--- | if the string matches the beginning of an abbreviation (before
--- the first period, return strings that would finish the abbreviation.
-likelyAbbrev :: String -> [String]
-likelyAbbrev x =
- let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.",
- "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.",
- "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.",
- "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.",
- "ch.", "sec.", "cf.", "cp."]
- abbrPairs = map (break (=='.')) abbrevs
- in map snd $ filter (\(y,_) -> y == x) abbrPairs
+ (do guardEnabled Ext_smart
+ abbrevs <- getOption readerAbbreviations
+ if not (null result) && last result == '.' && result `Set.member` abbrevs
+ then try (do ils <- whitespace <|> endline
+ lookAhead alphaNum
+ return $ do
+ ils' <- ils
+ if ils' == B.space
+ then return (B.str result <> B.str "\160")
+ else -- linebreak or softbreak
+ return (ils' <> B.str result <> B.str "\160"))
+ <|> return (return (B.str result))
+ else return (return (B.str result)))
+ <|> return (return (B.str result))
-- an endline character that can be treated as a space, not a structural break
endline :: PandocMonad m => MarkdownParser m (F Inlines)