aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2025-03-18 09:09:32 -0700
committerJohn MacFarlane <[email protected]>2025-03-18 09:10:59 -0700
commitb5e9ae3595ee27bc194138782fbc529d15681dea (patch)
tree5cc43213f713756e1825ffcb04bab2b300fa8bba
parent3acd8859bd59b865631707f785207b43cda8314f (diff)
Fix parsing of base64 data URIs...
...to allow (a) URI escapes, (b) whitespace (which will be ignored). Partially addresses #10704.
-rw-r--r--src/Text/Pandoc/Class/PandocMonad.hs3
-rw-r--r--src/Text/Pandoc/URI.hs2
2 files changed, 3 insertions, 2 deletions
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index b7f2d5a9b..e58429ee7 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -336,7 +336,8 @@ downloadOrRead :: PandocMonad m
-> m (B.ByteString, Maybe MimeType)
downloadOrRead s
| "data:" `T.isPrefixOf` s,
- Right (bs, mt) <- A.parseOnly pBase64DataURI s
+ Right (bs, mt) <- A.parseOnly pBase64DataURI
+ (T.pack . unEscapeString . T.unpack $ s)
= pure (bs, Just mt)
| otherwise = do
sourceURL <- getsCommonState stSourceURL
diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs
index 09a9660d8..556654a28 100644
--- a/src/Text/Pandoc/URI.hs
+++ b/src/Text/Pandoc/URI.hs
@@ -133,7 +133,7 @@ pBase64DataURI = base64uri
mps <- many mediaParam
pure $ n1 <> "/" <> n2 <> mconcat mps
A.string ";base64,"
- b64 <- A.takeWhile (A.inClass "A-Za-z0-9+/")
+ b64 <- A.takeWhile (A.inClass "A-Za-z0-9+ \t\r\n/")
A.skipWhile (== '=')
-- this decode should be lazy:
pure (decodeLenient (encodeUtf8 b64), mime)