diff options
| author | John MacFarlane <[email protected]> | 2023-01-05 21:14:02 -0800 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2023-01-05 21:14:02 -0800 |
| commit | 0d891afab65b6928ace302d1a110ea9303c4abfb (patch) | |
| tree | a32f0e0c624e8b5f1f56f7a66b150a273823505c /src | |
| parent | 7a82686adcf6efd68b32c5e471b3059be5085165 (diff) | |
isURI: don't require non-ASCII characters to be escaped.
Closes #8508.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/URI.hs | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs index 345ada768..7addd2844 100644 --- a/src/Text/Pandoc/URI.hs +++ b/src/Text/Pandoc/URI.hs @@ -20,7 +20,7 @@ import qualified Network.HTTP.Types as HTTP import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.Text as T import qualified Data.Set as Set -import Data.Char (isSpace) +import Data.Char (isSpace, isAscii) import Network.URI (URI (uriScheme), parseURI, escapeURIString) urlEncode :: T.Text -> T.Text @@ -90,7 +90,9 @@ schemes = Set.fromList -- | Check if the string is a valid URL with a IANA or frequently used but -- unofficial scheme (see @schemes@). isURI :: T.Text -> Bool -isURI = maybe False hasKnownScheme . parseURI . T.unpack +isURI = + -- we URI-escape non-ASCII characters because otherwise parseURI will choke: + maybe False hasKnownScheme . parseURI . escapeURIString isAscii . T.unpack where hasKnownScheme = (`Set.member` schemes) . T.toLower . T.filter (/= ':') . T.pack . uriScheme |
