From 2befeaa29f9589f81a3d6cd394d88f29089e7338 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 2 Oct 2021 22:13:03 -0700 Subject: Remove splitSentences from T.P.Shared [API change]. We used to attempt automatic sentence splitting in man and ms output, since sentence-ending periods need to be followed by two spaces or a newline in these formats. But it's difficult to do this reliably at the level of `[Inline]`. --- src/Text/Pandoc/Shared.hs | 28 ---------------------------- src/Text/Pandoc/Writers/Man.hs | 5 ++--- src/Text/Pandoc/Writers/Ms.hs | 5 ++--- 3 files changed, 4 insertions(+), 34 deletions(-) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 7bb830d0e..6e1f29fb1 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -68,7 +68,6 @@ module Text.Pandoc.Shared ( makeMeta, eastAsianLineBreakFilter, htmlSpanLikeElements, - splitSentences, filterIpynbOutput, -- * TagSoup HTML handling renderTags', @@ -709,33 +708,6 @@ eastAsianLineBreakFilter = bottomUp go htmlSpanLikeElements :: Set.Set T.Text htmlSpanLikeElements = Set.fromList ["kbd", "mark", "dfn"] --- | Returns the first sentence in a list of inlines, and the rest. -breakSentence :: [Inline] -> ([Inline], [Inline]) -breakSentence [] = ([],[]) -breakSentence xs = - let isSentenceEndInline (Str ys) - | Just (_, c) <- T.unsnoc ys = c == '.' || c == '?' - isSentenceEndInline LineBreak = True - isSentenceEndInline _ = False - (as, bs) = break isSentenceEndInline xs - in case bs of - [] -> (as, []) - [c] -> (as ++ [c], []) - (c:Space:cs) -> (as ++ [c], cs) - (c:SoftBreak:cs) -> (as ++ [c], cs) - (Str ".":Str s@(T.uncons -> Just (')',_)):cs) - -> (as ++ [Str ".", Str s], cs) - (x@(Str (T.stripPrefix ".)" -> Just _)):cs) -> (as ++ [x], cs) - (LineBreak:x@(Str (T.uncons -> Just ('.',_))):cs) -> (as ++[LineBreak], x:cs) - (c:cs) -> (as ++ [c] ++ ds, es) - where (ds, es) = breakSentence cs - --- | Split a list of inlines into sentences. -splitSentences :: [Inline] -> [[Inline]] -splitSentences xs = - let (sent, rest) = breakSentence xs - in if null rest then [sent] else sent : splitSentences rest - -- | Process ipynb output cells. If mode is Nothing, -- remove all output. If mode is Just format, select -- best output for the format. If format is not ipynb, diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 45516ea06..89789d3c5 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -108,10 +108,9 @@ blockToMan :: PandocMonad m -> StateT WriterState m (Doc Text) blockToMan opts (Div _ bs) = blockListToMan opts bs blockToMan opts (Plain inlines) = - liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines + inlineListToMan opts inlines blockToMan opts (Para inlines) = do - contents <- liftM vcat $ mapM (inlineListToMan opts) $ - splitSentences inlines + contents <- inlineListToMan opts inlines return $ text ".PP" $$ contents blockToMan opts (LineBlock lns) = blockToMan opts $ linesToPara lns diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 055324448..719407ac1 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -142,7 +142,7 @@ blockToMs opts (Div (ident,cls,kvs) bs) = do setFirstPara return $ anchor $$ res blockToMs opts (Plain inlines) = - liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines + inlineListToMs' opts inlines blockToMs opts (Para [Image attr alt (src,_tit)]) | let ext = takeExtension (T.unpack src) in (ext == ".ps" || ext == ".eps") = do let (mbW,mbH) = (inPoints opts <$> dimension Width attr, @@ -165,8 +165,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)]) blockToMs opts (Para inlines) = do firstPara <- gets stFirstPara resetFirstPara - contents <- liftM vcat $ mapM (inlineListToMs' opts) $ - splitSentences inlines + contents <- inlineListToMs' opts inlines return $ literal (if firstPara then ".LP" else ".PP") $$ contents blockToMs _ b@(RawBlock f str) | f == Format "ms" = return $ literal str -- cgit v1.2.3