aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2021-10-02 22:13:03 -0700
committerJohn MacFarlane <[email protected]>2021-10-02 22:13:03 -0700
commit4c092f4bae9caa5a49f6764613dd26dc4fa6c472 (patch)
treea241843a3b8bbe930076451c82c85a0f7d4e467d
parentf58db1d7877eba75d0f22aaa060e88a1510c795c (diff)
Remove splitSentences from T.P.Shared [API change].
We used to attempt automatic sentence splitting in man and ms output, since sentence-ending periods need to be followed by two spaces or a newline in these formats. But it's just too difficult to do this reliably. Now that you can set `--wrap=preserve`, that option provides a better way to ensure that sentences end at the end of a line, if that's important to you.
-rw-r--r--src/Text/Pandoc/Shared.hs44
-rw-r--r--src/Text/Pandoc/Writers/Man.hs5
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs5
3 files changed, 4 insertions, 50 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 6806a78c6..bb40ae2cc 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -68,7 +68,6 @@ module Text.Pandoc.Shared (
makeMeta,
eastAsianLineBreakFilter,
htmlSpanLikeElements,
- splitSentences,
filterIpynbOutput,
-- * TagSoup HTML handling
renderTags',
@@ -707,49 +706,6 @@ eastAsianLineBreakFilter = bottomUp go
htmlSpanLikeElements :: Set.Set T.Text
htmlSpanLikeElements = Set.fromList ["kbd", "mark", "dfn"]
--- | Returns the first sentence in a list of inlines, and the rest.
-breakSentence :: [Inline] -> ([Inline], [Inline])
-breakSentence xs =
- case break isStr xs of
- (ys, Str t:zs)
- | Just (t',t'') <- breakOnSentenceEnder t
- , not (T.null t'') || startsWithSpace zs
- -> (ys ++ [Str t'],
- case T.stripStart t'' of
- t''' | T.null t''' -> zs
- | otherwise -> Str t''' : zs)
- | otherwise ->
- let (as, bs) = breakSentence zs
- in (ys ++ Str t : as, bs)
- _ -> (xs, [])
- where
- isStr (Str _) = True
- isStr _ = False
- breakOnSentenceEnder t = -- ". " ".)" "? " "?)" ".$" "! " " !)"
- let (x,y) = T.break isSentenceEndPunct t
- in if T.null y
- then Nothing
- else case T.uncons (T.drop 1 y) of
- Nothing -> Just (t, mempty)
- Just (c,_) | c == ' ' || c == '\r' || c == '\n' || c == ')'
- -> Just (x <> T.take 1 y, T.drop 1 y)
- _ -> (\(w,z) -> (x <> T.take 1 y <> w, z)) <$>
- breakOnSentenceEnder (T.drop 1 y)
- isSentenceEndPunct '.' = True
- isSentenceEndPunct '?' = True
- isSentenceEndPunct '!' = True
- isSentenceEndPunct _ = False
- startsWithSpace (LineBreak:_) = True
- startsWithSpace (SoftBreak:_) = True
- startsWithSpace [] = True
- startsWithSpace _ = False
-
--- | Split a list of inlines into sentences.
-splitSentences :: [Inline] -> [[Inline]]
-splitSentences xs =
- let (sent, rest) = breakSentence xs
- in if null rest then [sent] else sent : splitSentences rest
-
-- | Process ipynb output cells. If mode is Nothing,
-- remove all output. If mode is Just format, select
-- best output for the format. If format is not ipynb,
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index b3607cdd0..058273250 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -109,10 +109,9 @@ blockToMan :: PandocMonad m
blockToMan _ Null = return empty
blockToMan opts (Div _ bs) = blockListToMan opts bs
blockToMan opts (Plain inlines) =
- liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines
+ inlineListToMan opts inlines
blockToMan opts (Para inlines) = do
- contents <- liftM vcat $ mapM (inlineListToMan opts) $
- splitSentences inlines
+ contents <- inlineListToMan opts inlines
return $ text ".PP" $$ contents
blockToMan opts (LineBlock lns) =
blockToMan opts $ linesToPara lns
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index d32ed53dc..c090cd08c 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -143,7 +143,7 @@ blockToMs opts (Div (ident,cls,kvs) bs) = do
setFirstPara
return $ anchor $$ res
blockToMs opts (Plain inlines) =
- liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines
+ inlineListToMs' opts inlines
blockToMs opts (Para [Image attr alt (src,_tit)])
| let ext = takeExtension (T.unpack src) in (ext == ".ps" || ext == ".eps") = do
let (mbW,mbH) = (inPoints opts <$> dimension Width attr,
@@ -166,8 +166,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)])
blockToMs opts (Para inlines) = do
firstPara <- gets stFirstPara
resetFirstPara
- contents <- liftM vcat $ mapM (inlineListToMs' opts) $
- splitSentences inlines
+ contents <- inlineListToMs' opts inlines
return $ literal (if firstPara then ".LP" else ".PP") $$ contents
blockToMs _ b@(RawBlock f str)
| f == Format "ms" = return $ literal str