diff options
| author | John MacFarlane <[email protected]> | 2025-03-18 23:25:39 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2025-03-18 23:25:39 -0700 |
| commit | 68bb4ae580cbd58d082e630b6344e5d0caa7fb16 (patch) | |
| tree | 6d85d2ffa8cfc9ef299dbe6183b69400b1680441 | |
| parent | 93437525a70cfd6989e6893c4c1d39c839e4ae7a (diff) | |
Handle percent encoding in pBase64URI.
This improves efficiency. See #10704.
| -rw-r--r-- | src/Text/Pandoc/Class/PandocMonad.hs | 3 | ||||
| -rw-r--r-- | src/Text/Pandoc/URI.hs | 15 |
2 files changed, 13 insertions, 5 deletions
diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index e58429ee7..850aff584 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -336,8 +336,7 @@ downloadOrRead :: PandocMonad m -> m (B.ByteString, Maybe MimeType) downloadOrRead s | "data:" `T.isPrefixOf` s, - Right (bs, mt) <- A.parseOnly pBase64DataURI - (T.pack . unEscapeString . T.unpack $ s) + Right (bs, mt) <- A.parseOnly (pBase64DataURI <* A.endOfInput) s = pure (bs, Just mt) | otherwise = do sourceURL <- getsCommonState stSourceURL diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs index 556654a28..6086cbe8b 100644 --- a/src/Text/Pandoc/URI.hs +++ b/src/Text/Pandoc/URI.hs @@ -24,11 +24,12 @@ import qualified Data.ByteString as B import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.Text as T import qualified Data.Set as Set -import Data.Char (isSpace, isAscii) +import Data.Char (isSpace, isAscii, isHexDigit, chr) +import Safe (readMay) import Network.URI (URI (uriScheme), parseURI, escapeURIString) import qualified Data.Attoparsec.Text as A import Data.Text.Encoding (encodeUtf8) -import Control.Applicative (many) +import Control.Applicative (many, (<|>)) urlEncode :: T.Text -> T.Text urlEncode = UTF8.toText . HTTP.urlEncode True . UTF8.fromText @@ -133,10 +134,18 @@ pBase64DataURI = base64uri mps <- many mediaParam pure $ n1 <> "/" <> n2 <> mconcat mps A.string ";base64," - b64 <- A.takeWhile (A.inClass "A-Za-z0-9+ \t\r\n/") + b64 <- mconcat <$> many + (A.takeWhile1 (A.inClass "A-Za-z0-9/+ \t\r\n") <|> percentOctet) A.skipWhile (== '=') -- this decode should be lazy: pure (decodeLenient (encodeUtf8 b64), mime) + percentOctet = do + A.char '%' + x <- A.satisfy isHexDigit + y <- A.satisfy isHexDigit + case readMay ['0','x',x,y] of + Nothing -> fail $ "Could not read percent encoded byte " <> [x,y] + Just d -> pure $ T.singleton $ chr d restrictedName = do c <- A.satisfy (A.inClass "A-Za-z0-9") rest <- A.takeWhile (A.inClass "A-Za-z0-9!#$&^_.+-") |
