diff options
author | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-07-11 02:14:57 +0000 |
---|---|---|
committer | fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> | 2008-07-11 02:14:57 +0000 |
commit | 824bb2d22e40e035703ccf6ec7fd6bcde51950ce (patch) | |
tree | b47ff78e86ddcdea299932fc2e4be39a387aa435 /Text/Pandoc/Readers | |
parent | 8ed710bc9d771a25e73be2582b379485a3e240dc (diff) | |
download | pandoc-824bb2d22e40e035703ccf6ec7fd6bcde51950ce.tar.gz |
In smart mode, use nonbreaking spaces after abbreviations in markdown parser.
Thus, for example, "Mr. Brown" comes out as "Mr.~Brown" in LaTeX, and does
not produce a sentence-separating space. Resolves Issue #75.
git-svn-id: https://pandoc.googlecode.com/svn/trunk@1298 788f1e2b-df1e-0410-8736-df70ead52e1b
Diffstat (limited to 'Text/Pandoc/Readers')
-rw-r--r-- | Text/Pandoc/Readers/Markdown.hs | 25 |
1 files changed, 23 insertions, 2 deletions
diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs index c9fbbe2d9..d0d99b607 100644 --- a/Text/Pandoc/Readers/Markdown.hs +++ b/Text/Pandoc/Readers/Markdown.hs @@ -33,7 +33,7 @@ module Text.Pandoc.Readers.Markdown ( import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex ) import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) +import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit ) import Data.Maybe ( fromMaybe ) import Text.Pandoc.Definition import Text.Pandoc.Shared @@ -697,7 +697,8 @@ table = simpleTable <|> multilineTable <?> "table" inline = choice inlineParsers <?> "inline" -inlineParsers = [ str +inlineParsers = [ abbrev + , str , smartPunctuation , whitespace , endline @@ -792,6 +793,26 @@ subscript = failIfStrict >> enclosed (char '~') (char '~') (notFollowedBy' whitespace >> inline) >>= -- may not contain Space return . Subscript +abbrev = failUnlessSmart >> + (assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160") + +-- an string of letters followed by a period that does not end a sentence +-- is assumed to be an abbreviation. It is assumed that sentences don't +-- start with lowercase letters or numerals. +assumedAbbrev = try $ do + result <- many1 $ satisfy isAlpha + string ". " + lookAhead $ satisfy (\x -> isLower x || isDigit x) + return result + +-- these strings are treated as abbreviations even if they are followed +-- by a capital letter (such as a name). +knownAbbrev = try $ do + result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen", + "Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs" ] + string ". " + return result + smartPunctuation = failUnlessSmart >> choice [ quoted, apostrophe, dash, ellipses ] |