diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-03-05 10:24:39 +0100 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-03-05 10:24:39 +0100 |
commit | 95f2726ee74a7770bf0eb5fe5629c493addbe298 (patch) | |
tree | 9970c4febe033118e910e465e94aca9c41f34771 /src/Text | |
parent | 7fc6919f9039a3c71028b807d4372f18cf35bee8 (diff) | |
download | pandoc-95f2726ee74a7770bf0eb5fe5629c493addbe298.tar.gz |
Added readerAbbreviations to ParserState.
Markdown reader now consults this to determine what is an
abbreviation.
Eventually it will be possible to specify a custom list
(see #256).
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Options.hs | 11 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 41 |
2 files changed, 27 insertions, 25 deletions
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 6db53c3dc..0379b0ddf 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -47,6 +47,7 @@ module Text.Pandoc.Options ( module Text.Pandoc.Extensions ) where import Data.Data (Data) import Data.Default +import qualified Data.Set as Set import Data.Typeable (Typeable) import GHC.Generics (Generic) import Text.Pandoc.Extensions @@ -60,6 +61,7 @@ data ReaderOptions = ReaderOptions{ , readerApplyMacros :: Bool -- ^ Apply macros to TeX math , readerIndentedCodeClasses :: [String] -- ^ Default classes for -- indented code blocks + , readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations , readerDefaultImageExtension :: String -- ^ Default extension for images , readerTrackChanges :: TrackChanges } deriving (Show, Read, Data, Typeable, Generic) @@ -72,10 +74,19 @@ instance Default ReaderOptions , readerTabStop = 4 , readerApplyMacros = True , readerIndentedCodeClasses = [] + , readerAbbreviations = defaultAbbrevs , readerDefaultImageExtension = "" , readerTrackChanges = AcceptChanges } +defaultAbbrevs :: Set.Set String +defaultAbbrevs = Set.fromList + [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.", + "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.", + "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.", + "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.", + "ch.", "sec.", "cf.", "cp."] + -- -- Writer options -- diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4790f83ff..abaa907e0 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -42,6 +42,7 @@ import Data.Maybe import Data.Monoid ((<>)) import Data.Ord (comparing) import Data.Scientific (base10Exponent, coefficient) +import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import qualified Data.Vector as V @@ -1688,32 +1689,22 @@ nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) str = do - result <- many1 alphaNum + result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) updateLastStrPos - let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) - isSmart <- extensionEnabled Ext_smart <$> getOption readerExtensions - if isSmart - then case likelyAbbrev result of - [] -> return $ return $ B.str result - xs -> choice (map (\x -> - try (string x >> oneOf " \n" >> - lookAhead alphaNum >> - return (return $ B.str - $ result ++ spacesToNbr x ++ "\160"))) xs) - <|> (return $ return $ B.str result) - else return $ return $ B.str result - --- | if the string matches the beginning of an abbreviation (before --- the first period, return strings that would finish the abbreviation. -likelyAbbrev :: String -> [String] -likelyAbbrev x = - let abbrevs = [ "Mr.", "Mrs.", "Ms.", "Capt.", "Dr.", "Prof.", - "Gen.", "Gov.", "e.g.", "i.e.", "Sgt.", "St.", - "vol.", "vs.", "Sen.", "Rep.", "Pres.", "Hon.", - "Rev.", "Ph.D.", "M.D.", "M.A.", "p.", "pp.", - "ch.", "sec.", "cf.", "cp."] - abbrPairs = map (break (=='.')) abbrevs - in map snd $ filter (\(y,_) -> y == x) abbrPairs + (do guardEnabled Ext_smart + abbrevs <- getOption readerAbbreviations + if not (null result) && last result == '.' && result `Set.member` abbrevs + then try (do ils <- whitespace <|> endline + lookAhead alphaNum + return $ do + ils' <- ils + if ils' == B.space + then return (B.str result <> B.str "\160") + else -- linebreak or softbreak + return (ils' <> B.str result <> B.str "\160")) + <|> return (return (B.str result)) + else return (return (B.str result))) + <|> return (return (B.str result)) -- an endline character that can be treated as a space, not a structural break endline :: PandocMonad m => MarkdownParser m (F Inlines) |