From b5e9ae3595ee27bc194138782fbc529d15681dea Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 18 Mar 2025 09:09:32 -0700 Subject: Fix parsing of base64 data URIs... ...to allow (a) URI escapes, (b) whitespace (which will be ignored). Partially addresses #10704. --- src/Text/Pandoc/Class/PandocMonad.hs | 3 ++- src/Text/Pandoc/URI.hs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index b7f2d5a9b..e58429ee7 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -336,7 +336,8 @@ downloadOrRead :: PandocMonad m -> m (B.ByteString, Maybe MimeType) downloadOrRead s | "data:" `T.isPrefixOf` s, - Right (bs, mt) <- A.parseOnly pBase64DataURI s + Right (bs, mt) <- A.parseOnly pBase64DataURI + (T.pack . unEscapeString . T.unpack $ s) = pure (bs, Just mt) | otherwise = do sourceURL <- getsCommonState stSourceURL diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs index 09a9660d8..556654a28 100644 --- a/src/Text/Pandoc/URI.hs +++ b/src/Text/Pandoc/URI.hs @@ -133,7 +133,7 @@ pBase64DataURI = base64uri mps <- many mediaParam pure $ n1 <> "/" <> n2 <> mconcat mps A.string ";base64," - b64 <- A.takeWhile (A.inClass "A-Za-z0-9+/") + b64 <- A.takeWhile (A.inClass "A-Za-z0-9+ \t\r\n/") A.skipWhile (== '=') -- this decode should be lazy: pure (decodeLenient (encodeUtf8 b64), mime) -- cgit v1.2.3