diff options
| author | John MacFarlane <[email protected]> | 2022-10-16 14:49:54 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2022-10-16 14:59:28 -0700 |
| commit | abb7aab75f241a70b11365d52fae0dffa77b077f (patch) | |
| tree | 6f48c3828b23b2bf4d5fea6993a8628fbd7c0dc9 | |
| parent | fa450607a832c29aa24ff57dba83c7491c83feeb (diff) | |
RST writer: improve inline escaping rules.
Also a small performance optimization.
Closes #8380.
| -rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 80 | ||||
| -rw-r--r-- | test/command/8380.md | 31 |
2 files changed, 91 insertions, 20 deletions
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 1745561d2..2237294bd 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -15,7 +15,10 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html> -} module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Control.Monad.State.Strict -import Data.Char (isSpace) +import Data.Char (isSpace, generalCategory, isAscii, isAlphaNum, + GeneralCategory( + ClosePunctuation, OpenPunctuation, InitialQuote, + FinalQuote, DashPunctuation, OtherPunctuation)) import Data.List (transpose, intersperse, foldl') import qualified Data.List.NonEmpty as NE import Data.Maybe (fromMaybe) @@ -160,25 +163,62 @@ pictToRST (label, (attr, src, _, mbtarget)) = do -- | Escape special characters for RST. escapeText :: WriterOptions -> Text -> Text -escapeText o = T.pack . escapeString' True o . T.unpack -- This ought to be parser - where - escapeString' _ _ [] = [] - escapeString' firstChar opts (c:cs) = - case c of - '\\' -> '\\':c:escapeString' False opts cs - _ | c `elemText` "`*_|" && - (firstChar || null cs) -> '\\':c:escapeString' False opts cs - '\'' | isEnabled Ext_smart opts -> '\\':'\'':escapeString' False opts cs - '"' | isEnabled Ext_smart opts -> '\\':'"':escapeString' False opts cs - '-' | isEnabled Ext_smart opts -> - case cs of - '-':_ -> '\\':'-':escapeString' False opts cs - _ -> '-':escapeString' False opts cs - '.' | isEnabled Ext_smart opts -> - case cs of - '.':'.':rest -> '\\':'.':'.':'.':escapeString' False opts rest - _ -> '.':escapeString' False opts cs - _ -> c : escapeString' False opts cs +escapeText opts t = + if T.any isSpecial t + then T.pack . escapeString' True . T.unpack $ t + else t -- optimization + where + isSmart = isEnabled Ext_smart opts + isSpecial c = c == '\\' || c == '_' || c == '`' || c == '*' || c == '|' + || (isSmart && (c == '-' || c == '.' || c == '"' || c == '\'')) + canFollowInlineMarkup c = c == '-' || c == '.' || c == ',' || c == ':' + || c == ';' || c == '!' || c == '?' || c == '\'' + || c == '"' || c == ')' || c == ']' || c == '}' + || c == '>' || isSpace c + || (not (isAscii c) && + generalCategory c `elem` + [OpenPunctuation, InitialQuote, FinalQuote, + DashPunctuation, OtherPunctuation]) + canPrecedeInlineMarkup c = c == '-' || c == ':' || c == '/' || c == '\'' + || c == '"' || c == '<' || c == '(' || c == '[' + || c == '{' || isSpace c + || (not (isAscii c) && + generalCategory c `elem` + [ClosePunctuation, InitialQuote, FinalQuote, + DashPunctuation, OtherPunctuation]) + escapeString' canStart cs = + case cs of + [] -> [] + d:ds + | d == '\\' + -> '\\' : d : escapeString' False ds + '\'':ds + | isSmart + -> '\\' : '\'' : escapeString' True ds + '"':ds + | isSmart + -> '\\' : '"' : escapeString' True ds + '-':'-':ds + | isSmart + -> '\\' : '-' : escapeString' False ('-':ds) + '.':'.':'.':ds + | isSmart + -> '\\' : '.' : escapeString' False ('.':'.':ds) + e:[] + | e == '*' || e == '_' || e == '|' || e == '`' + -> ['\\',e] + d:ds + | canPrecedeInlineMarkup d + -> d : escapeString' True ds + e:d:ds + | e == '*' || e == '_' || e == '|' || e == '`' + , (not canStart && canFollowInlineMarkup d) + || (canStart && not (isSpace d)) + -> '\\' : e : escapeString' False (d:ds) + '_':d:ds + | not (isAlphaNum d) + -> '\\' : '_' : escapeString' False (d:ds) + d:ds -> d : escapeString' False ds titleToRST :: PandocMonad m => [Inline] -> [Inline] -> RST m (Doc Text) titleToRST [] _ = return empty diff --git a/test/command/8380.md b/test/command/8380.md new file mode 100644 index 000000000..116922c85 --- /dev/null +++ b/test/command/8380.md @@ -0,0 +1,31 @@ +``` +% pandoc -f man -t rst +LC_* +^D +LC\_\* +``` + +These examples of things that don't require escaping are taken +from the RST documentation: + +``` +% pandoc -f native -t rst +[Para [Str "2*x a**b O(N**2) e**(x*y) f(x)*f(y) a|b"] +,Para [Str "a**b O(N**2) e**(x*y) f(x)*f(y)"] +] +^D +2*x a**b O(N**2) e**(x*y) f(x)*f(y) a|b + +a**b O(N**2) e**(x*y) f(x)*f(y) +``` + +These examples of things that do require escaping are taken +from the RST documentation: + +``` +% pandoc -f native -t rst +Str "*4, class_, *args, **kwargs, `TeX-quoted', *ML, *.txt" +^D +\*4, class\_, \*args, \**kwargs, \`TeX-quoted', \*ML, \*.txt +``` + |
