aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2022-10-16 14:49:54 -0700
committerJohn MacFarlane <[email protected]>2022-10-16 14:59:28 -0700
commitabb7aab75f241a70b11365d52fae0dffa77b077f (patch)
tree6f48c3828b23b2bf4d5fea6993a8628fbd7c0dc9
parentfa450607a832c29aa24ff57dba83c7491c83feeb (diff)
RST writer: improve inline escaping rules.
Also a small performance optimization. Closes #8380.
-rw-r--r--src/Text/Pandoc/Writers/RST.hs80
-rw-r--r--test/command/8380.md31
2 files changed, 91 insertions, 20 deletions
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 1745561d2..2237294bd 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -15,7 +15,10 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html>
-}
module Text.Pandoc.Writers.RST ( writeRST, flatten ) where
import Control.Monad.State.Strict
-import Data.Char (isSpace)
+import Data.Char (isSpace, generalCategory, isAscii, isAlphaNum,
+ GeneralCategory(
+ ClosePunctuation, OpenPunctuation, InitialQuote,
+ FinalQuote, DashPunctuation, OtherPunctuation))
import Data.List (transpose, intersperse, foldl')
import qualified Data.List.NonEmpty as NE
import Data.Maybe (fromMaybe)
@@ -160,25 +163,62 @@ pictToRST (label, (attr, src, _, mbtarget)) = do
-- | Escape special characters for RST.
escapeText :: WriterOptions -> Text -> Text
-escapeText o = T.pack . escapeString' True o . T.unpack -- This ought to be parser
- where
- escapeString' _ _ [] = []
- escapeString' firstChar opts (c:cs) =
- case c of
- '\\' -> '\\':c:escapeString' False opts cs
- _ | c `elemText` "`*_|" &&
- (firstChar || null cs) -> '\\':c:escapeString' False opts cs
- '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString' False opts cs
- '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs
- '-' | isEnabled Ext_smart opts ->
- case cs of
- '-':_ -> '\\':'-':escapeString' False opts cs
- _ -> '-':escapeString' False opts cs
- '.' | isEnabled Ext_smart opts ->
- case cs of
- '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest
- _ -> '.':escapeString' False opts cs
- _ -> c : escapeString' False opts cs
+escapeText opts t =
+ if T.any isSpecial t
+ then T.pack . escapeString' True . T.unpack $ t
+ else t -- optimization
+ where
+ isSmart = isEnabled Ext_smart opts
+ isSpecial c = c == '\\' || c == '_' || c == '`' || c == '*' || c == '|'
+ || (isSmart && (c == '-' || c == '.' || c == '"' || c == '\''))
+ canFollowInlineMarkup c = c == '-' || c == '.' || c == ',' || c == ':'
+ || c == ';' || c == '!' || c == '?' || c == '\''
+ || c == '"' || c == ')' || c == ']' || c == '}'
+ || c == '>' || isSpace c
+ || (not (isAscii c) &&
+ generalCategory c `elem`
+ [OpenPunctuation, InitialQuote, FinalQuote,
+ DashPunctuation, OtherPunctuation])
+ canPrecedeInlineMarkup c = c == '-' || c == ':' || c == '/' || c == '\''
+ || c == '"' || c == '<' || c == '(' || c == '['
+ || c == '{' || isSpace c
+ || (not (isAscii c) &&
+ generalCategory c `elem`
+ [ClosePunctuation, InitialQuote, FinalQuote,
+ DashPunctuation, OtherPunctuation])
+ escapeString' canStart cs =
+ case cs of
+ [] -> []
+ d:ds
+ | d == '\\'
+ -> '\\' : d : escapeString' False ds
+ '\'':ds
+ | isSmart
+ -> '\\' : '\'' : escapeString' True ds
+ '"':ds
+ | isSmart
+ -> '\\' : '"' : escapeString' True ds
+ '-':'-':ds
+ | isSmart
+ -> '\\' : '-' : escapeString' False ('-':ds)
+ '.':'.':'.':ds
+ | isSmart
+ -> '\\' : '.' : escapeString' False ('.':'.':ds)
+ e:[]
+ | e == '*' || e == '_' || e == '|' || e == '`'
+ -> ['\\',e]
+ d:ds
+ | canPrecedeInlineMarkup d
+ -> d : escapeString' True ds
+ e:d:ds
+ | e == '*' || e == '_' || e == '|' || e == '`'
+ , (not canStart && canFollowInlineMarkup d)
+ || (canStart && not (isSpace d))
+ -> '\\' : e : escapeString' False (d:ds)
+ '_':d:ds
+ | not (isAlphaNum d)
+ -> '\\' : '_' : escapeString' False (d:ds)
+ d:ds -> d : escapeString' False ds
titleToRST :: PandocMonad m => [Inline] -> [Inline] -> RST m (Doc Text)
titleToRST [] _ = return empty
diff --git a/test/command/8380.md b/test/command/8380.md
new file mode 100644
index 000000000..116922c85
--- /dev/null
+++ b/test/command/8380.md
@@ -0,0 +1,31 @@
+```
+% pandoc -f man -t rst
+LC_*
+^D
+LC\_\*
+```
+
+These examples of things that don't require escaping are taken
+from the RST documentation:
+
+```
+% pandoc -f native -t rst
+[Para [Str "2*x a**b O(N**2) e**(x*y) f(x)*f(y) a|b"]
+,Para [Str "a**b O(N**2) e**(x*y) f(x)*f(y)"]
+]
+^D
+2*x a**b O(N**2) e**(x*y) f(x)*f(y) a|b
+
+a**b O(N**2) e**(x*y) f(x)*f(y)
+```
+
+These examples of things that do require escaping are taken
+from the RST documentation:
+
+```
+% pandoc -f native -t rst
+Str "*4, class_, *args, **kwargs, `TeX-quoted', *ML, *.txt"
+^D
+\*4, class\_, \*args, \**kwargs, \`TeX-quoted', \*ML, \*.txt
+```
+