aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2023-01-05 21:14:02 -0800
committerJohn MacFarlane <[email protected]>2023-01-05 21:14:02 -0800
commit0d891afab65b6928ace302d1a110ea9303c4abfb (patch)
treea32f0e0c624e8b5f1f56f7a66b150a273823505c /src
parent7a82686adcf6efd68b32c5e471b3059be5085165 (diff)
isURI: don't require non-ASCII characters to be escaped.
Closes #8508.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/URI.hs6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs
index 345ada768..7addd2844 100644
--- a/src/Text/Pandoc/URI.hs
+++ b/src/Text/Pandoc/URI.hs
@@ -20,7 +20,7 @@ import qualified Network.HTTP.Types as HTTP
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.Text as T
import qualified Data.Set as Set
-import Data.Char (isSpace)
+import Data.Char (isSpace, isAscii)
import Network.URI (URI (uriScheme), parseURI, escapeURIString)
urlEncode :: T.Text -> T.Text
@@ -90,7 +90,9 @@ schemes = Set.fromList
-- | Check if the string is a valid URL with a IANA or frequently used but
-- unofficial scheme (see @schemes@).
isURI :: T.Text -> Bool
-isURI = maybe False hasKnownScheme . parseURI . T.unpack
+isURI =
+ -- we URI-escape non-ASCII characters because otherwise parseURI will choke:
+ maybe False hasKnownScheme . parseURI . escapeURIString isAscii . T.unpack
where
hasKnownScheme = (`Set.member` schemes) . T.toLower .
T.filter (/= ':') . T.pack . uriScheme