aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2025-03-18 23:25:39 -0700
committerJohn MacFarlane <[email protected]>2025-03-18 23:25:39 -0700
commit68bb4ae580cbd58d082e630b6344e5d0caa7fb16 (patch)
tree6d85d2ffa8cfc9ef299dbe6183b69400b1680441 /src
parent93437525a70cfd6989e6893c4c1d39c839e4ae7a (diff)
Handle percent encoding in pBase64URI.
This improves efficiency. See #10704.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Class/PandocMonad.hs3
-rw-r--r--src/Text/Pandoc/URI.hs15
2 files changed, 13 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs
index e58429ee7..850aff584 100644
--- a/src/Text/Pandoc/Class/PandocMonad.hs
+++ b/src/Text/Pandoc/Class/PandocMonad.hs
@@ -336,8 +336,7 @@ downloadOrRead :: PandocMonad m
-> m (B.ByteString, Maybe MimeType)
downloadOrRead s
| "data:" `T.isPrefixOf` s,
- Right (bs, mt) <- A.parseOnly pBase64DataURI
- (T.pack . unEscapeString . T.unpack $ s)
+ Right (bs, mt) <- A.parseOnly (pBase64DataURI <* A.endOfInput) s
= pure (bs, Just mt)
| otherwise = do
sourceURL <- getsCommonState stSourceURL
diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs
index 556654a28..6086cbe8b 100644
--- a/src/Text/Pandoc/URI.hs
+++ b/src/Text/Pandoc/URI.hs
@@ -24,11 +24,12 @@ import qualified Data.ByteString as B
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.Text as T
import qualified Data.Set as Set
-import Data.Char (isSpace, isAscii)
+import Data.Char (isSpace, isAscii, isHexDigit, chr)
+import Safe (readMay)
import Network.URI (URI (uriScheme), parseURI, escapeURIString)
import qualified Data.Attoparsec.Text as A
import Data.Text.Encoding (encodeUtf8)
-import Control.Applicative (many)
+import Control.Applicative (many, (<|>))
urlEncode :: T.Text -> T.Text
urlEncode = UTF8.toText . HTTP.urlEncode True . UTF8.fromText
@@ -133,10 +134,18 @@ pBase64DataURI = base64uri
mps <- many mediaParam
pure $ n1 <> "/" <> n2 <> mconcat mps
A.string ";base64,"
- b64 <- A.takeWhile (A.inClass "A-Za-z0-9+ \t\r\n/")
+ b64 <- mconcat <$> many
+ (A.takeWhile1 (A.inClass "A-Za-z0-9/+ \t\r\n") <|> percentOctet)
A.skipWhile (== '=')
-- this decode should be lazy:
pure (decodeLenient (encodeUtf8 b64), mime)
+ percentOctet = do
+ A.char '%'
+ x <- A.satisfy isHexDigit
+ y <- A.satisfy isHexDigit
+ case readMay ['0','x',x,y] of
+ Nothing -> fail $ "Could not read percent encoded byte " <> [x,y]
+ Just d -> pure $ T.singleton $ chr d
restrictedName = do
c <- A.satisfy (A.inClass "A-Za-z0-9")
rest <- A.takeWhile (A.inClass "A-Za-z0-9!#$&^_.+-")