diff options
| author | John MacFarlane <[email protected]> | 2021-10-02 22:13:03 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2021-10-02 22:13:03 -0700 |
| commit | 4c092f4bae9caa5a49f6764613dd26dc4fa6c472 (patch) | |
| tree | a241843a3b8bbe930076451c82c85a0f7d4e467d | |
| parent | f58db1d7877eba75d0f22aaa060e88a1510c795c (diff) | |
Remove splitSentences from T.P.Shared [API change].
We used to attempt automatic sentence splitting in man and ms
output, since sentence-ending periods need to be followed by
two spaces or a newline in these formats.
But it's just too difficult to do this reliably.
Now that you can set `--wrap=preserve`, that option provides a
better way to ensure that sentences end at the end of a line,
if that's important to you.
| -rw-r--r-- | src/Text/Pandoc/Shared.hs | 44 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Man.hs | 5 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/Ms.hs | 5 |
3 files changed, 4 insertions, 50 deletions
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 6806a78c6..bb40ae2cc 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -68,7 +68,6 @@ module Text.Pandoc.Shared ( makeMeta, eastAsianLineBreakFilter, htmlSpanLikeElements, - splitSentences, filterIpynbOutput, -- * TagSoup HTML handling renderTags', @@ -707,49 +706,6 @@ eastAsianLineBreakFilter = bottomUp go htmlSpanLikeElements :: Set.Set T.Text htmlSpanLikeElements = Set.fromList ["kbd", "mark", "dfn"] --- | Returns the first sentence in a list of inlines, and the rest. -breakSentence :: [Inline] -> ([Inline], [Inline]) -breakSentence xs = - case break isStr xs of - (ys, Str t:zs) - | Just (t',t'') <- breakOnSentenceEnder t - , not (T.null t'') || startsWithSpace zs - -> (ys ++ [Str t'], - case T.stripStart t'' of - t''' | T.null t''' -> zs - | otherwise -> Str t''' : zs) - | otherwise -> - let (as, bs) = breakSentence zs - in (ys ++ Str t : as, bs) - _ -> (xs, []) - where - isStr (Str _) = True - isStr _ = False - breakOnSentenceEnder t = -- ". " ".)" "? " "?)" ".$" "! " " !)" - let (x,y) = T.break isSentenceEndPunct t - in if T.null y - then Nothing - else case T.uncons (T.drop 1 y) of - Nothing -> Just (t, mempty) - Just (c,_) | c == ' ' || c == '\r' || c == '\n' || c == ')' - -> Just (x <> T.take 1 y, T.drop 1 y) - _ -> (\(w,z) -> (x <> T.take 1 y <> w, z)) <$> - breakOnSentenceEnder (T.drop 1 y) - isSentenceEndPunct '.' = True - isSentenceEndPunct '?' = True - isSentenceEndPunct '!' = True - isSentenceEndPunct _ = False - startsWithSpace (LineBreak:_) = True - startsWithSpace (SoftBreak:_) = True - startsWithSpace [] = True - startsWithSpace _ = False - --- | Split a list of inlines into sentences. -splitSentences :: [Inline] -> [[Inline]] -splitSentences xs = - let (sent, rest) = breakSentence xs - in if null rest then [sent] else sent : splitSentences rest - -- | Process ipynb output cells. If mode is Nothing, -- remove all output. If mode is Just format, select -- best output for the format. If format is not ipynb, diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index b3607cdd0..058273250 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -109,10 +109,9 @@ blockToMan :: PandocMonad m blockToMan _ Null = return empty blockToMan opts (Div _ bs) = blockListToMan opts bs blockToMan opts (Plain inlines) = - liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines + inlineListToMan opts inlines blockToMan opts (Para inlines) = do - contents <- liftM vcat $ mapM (inlineListToMan opts) $ - splitSentences inlines + contents <- inlineListToMan opts inlines return $ text ".PP" $$ contents blockToMan opts (LineBlock lns) = blockToMan opts $ linesToPara lns diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index d32ed53dc..c090cd08c 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -143,7 +143,7 @@ blockToMs opts (Div (ident,cls,kvs) bs) = do setFirstPara return $ anchor $$ res blockToMs opts (Plain inlines) = - liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines + inlineListToMs' opts inlines blockToMs opts (Para [Image attr alt (src,_tit)]) | let ext = takeExtension (T.unpack src) in (ext == ".ps" || ext == ".eps") = do let (mbW,mbH) = (inPoints opts <$> dimension Width attr, @@ -166,8 +166,7 @@ blockToMs opts (Para [Image attr alt (src,_tit)]) blockToMs opts (Para inlines) = do firstPara <- gets stFirstPara resetFirstPara - contents <- liftM vcat $ mapM (inlineListToMs' opts) $ - splitSentences inlines + contents <- inlineListToMs' opts inlines return $ literal (if firstPara then ".LP" else ".PP") $$ contents blockToMs _ b@(RawBlock f str) | f == Format "ms" = return $ literal str |
