aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-10-02 22:13:03 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-10-11 09:35:50 -0700
commit2befeaa29f9589f81a3d6cd394d88f29089e7338 (patch)
treea8fe6a833a73fa7f3c53ba6f998a89981d2b7e8b
parent63ea754b49612bb89317b3aa30b756a7cd52d2ac (diff)
downloadpandoc-2befeaa29f9589f81a3d6cd394d88f29089e7338.tar.gz
Remove splitSentences from T.P.Shared [API change].
We used to attempt automatic sentence splitting in man and ms output, since sentence-ending periods need to be followed by two spaces or a newline in these formats. But it's difficult to do this reliably at the level of `[Inline]`.
-rw-r--r--src/Text/Pandoc/Shared.hs28
-rw-r--r--src/Text/Pandoc/Writers/Man.hs5
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs5
3 files changed, 4 insertions, 34 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 7bb830d0e..6e1f29fb1 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -68,7 +68,6 @@ module Text.Pandoc.Shared (
makeMeta,
eastAsianLineBreakFilter,
htmlSpanLikeElements,
- splitSentences,
filterIpynbOutput,
-- * TagSoup HTML handling
renderTags',
@@ -709,33 +708,6 @@ eastAsianLineBreakFilter = bottomUp go
htmlSpanLikeElements :: Set.Set T.Text
htmlSpanLikeElements = Set.fromList ["kbd", "mark", "dfn"]
--- | Returns the first sentence in a list of inlines, and the rest.
-breakSentence :: [Inline] -> ([Inline], [Inline])
-breakSentence [] = ([],[])
-breakSentence xs =
- let isSentenceEndInline (Str ys)
- | Just (_, c) <- T.unsnoc ys = c == '.' || c == '?'
- isSentenceEndInline LineBreak = True
- isSentenceEndInline _ = False
- (as, bs) = break isSentenceEndInline xs
- in case bs of
- [] -> (as, [])
- [c] -> (as ++ [c], [])
- (c:Space:cs) -> (as ++ [c], cs)
- (c:SoftBreak:cs) -> (as ++ [c], cs)
- (Str ".":Str s@(T.uncons -> Just (')',_)):cs)
- -> (as ++ [Str ".", Str s], cs)
- (x@(Str (T.stripPrefix ".)" -> Just _)):cs) -> (as ++ [x], cs)
- (LineBreak:x@(Str (T.uncons -> Just ('.',_))):cs) -> (as ++[LineBreak], x:cs)
- (c:cs) -> (as ++ [c] ++ ds, es)
- where (ds, es) = breakSentence cs
-
--- | Split a list of inlines into sentences.
-splitSentences :: [Inline] -> [[Inline]]
-splitSentences xs =
- let (sent, rest) = breakSentence xs
- in if null rest then [sent] else sent : splitSentences rest
-
-- | Process ipynb output cells. If mode is Nothing,
-- remove all output. If mode is Just format, select
-- best output for the format. If format is not ipynb,
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 45516ea06..89789d3c5 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -108,10 +108,9 @@ blockToMan :: PandocMonad m
-> StateT WriterState m (Doc Text)
blockToMan opts (Div _ bs) = blockListToMan opts bs
blockToMan opts (Plain inlines) =
- liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines
+ inlineListToMan opts inlines
blockToMan opts (Para inlines) = do
- contents <- liftM vcat $ mapM (inlineListToMan opts) $
- splitSentences inlines
+ contents <- inlineListToMan opts inlines
return $ text ".PP" $$ contents
blockToMan opts (LineBlock lns) =
blockToMan opts $ linesToPara lns
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index 055324448..719407ac1 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -142,7 +142,7 @@ blockToMs opts (Div (ident,cls,kvs) bs) = do
setFirstPara
return $ anchor $$ res
blockToMs opts (Plain inlines) =
- liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines
+ inlineListToMs' opts inlines
blockToMs opts (Para [Image attr alt (src,_tit)])
| let ext = takeExtension (T.unpack src) in (ext == ".ps" || ext == ".eps") = do
let (mbW,mbH) = (inPoints opts <$> dimension Width attr,
@@ -165,8 +165,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)])
blockToMs opts (Para inlines) = do
firstPara <- gets stFirstPara
resetFirstPara
- contents <- liftM vcat $ mapM (inlineListToMs' opts) $
- splitSentences inlines
+ contents <- inlineListToMs' opts inlines
return $ literal (if firstPara then ".LP" else ".PP") $$ contents
blockToMs _ b@(RawBlock f str)
| f == Format "ms" = return $ literal str