In smart mode, use nonbreaking spaces after abbreviations in markdown parser.

Thus, for example, "Mr. Brown" comes out as "Mr.~Brown" in LaTeX, and does not produce a sentence-separating space. Resolves Issue #75. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1298 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2008-07-11 02:14:57 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2008-07-11 02:14:57 +0000
commit: 824bb2d22e40e035703ccf6ec7fd6bcde51950ce (patch)
tree: b47ff78e86ddcdea299932fc2e4be39a387aa435 /Text/Pandoc/Readers
parent: 8ed710bc9d771a25e73be2582b379485a3e240dc (diff)
download: pandoc-824bb2d22e40e035703ccf6ec7fd6bcde51950ce.tar.gz
1 files changed, 23 insertions, 2 deletions
diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs
index c9fbbe2d9..d0d99b607 100644
--- a/Text/Pandoc/Readers/Markdown.hs
+++ b/Text/Pandoc/Readers/Markdown.hs
@@ -33,7 +33,7 @@ module Text.Pandoc.Readers.Markdown (
 
 import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex )
 import Data.Ord ( comparing )
-import Data.Char ( isAlphaNum )
+import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit )
 import Data.Maybe ( fromMaybe )
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared 
@@ -697,7 +697,8 @@ table = simpleTable <|> multilineTable <?> "table"
 
 inline = choice inlineParsers <?> "inline"
 
-inlineParsers = [ str
+inlineParsers = [ abbrev
+                , str
                 , smartPunctuation
                 , whitespace
                 , endline
@@ -792,6 +793,26 @@ subscript = failIfStrict >> enclosed (char '~') (char '~')
             (notFollowedBy' whitespace >> inline) >>=  -- may not contain Space
             return . Subscript 
 
+abbrev = failUnlessSmart >>
+         (assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160")
+
+-- an string of letters followed by a period that does not end a sentence
+-- is assumed to be an abbreviation.  It is assumed that sentences don't
+-- start with lowercase letters or numerals.
+assumedAbbrev = try $ do
+  result <- many1 $ satisfy isAlpha
+  string ". "
+  lookAhead $ satisfy (\x -> isLower x || isDigit x)
+  return result
+
+-- these strings are treated as abbreviations even if they are followed
+-- by a capital letter (such as a name).
+knownAbbrev = try $ do
+  result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen",
+                           "Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs" ]
+  string ". "
+  return result
+  
 smartPunctuation = failUnlessSmart >> 
                    choice [ quoted, apostrophe, dash, ellipses ]
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2008-07-11 02:14:57 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2008-07-11 02:14:57 +0000
commit	824bb2d22e40e035703ccf6ec7fd6bcde51950ce (patch)
tree	b47ff78e86ddcdea299932fc2e4be39a387aa435 /Text/Pandoc/Readers
parent	8ed710bc9d771a25e73be2582b379485a3e240dc (diff)
download	pandoc-824bb2d22e40e035703ccf6ec7fd6bcde51950ce.tar.gz