diff options
| author | John MacFarlane <[email protected]> | 2024-12-19 13:30:38 -0800 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2024-12-19 13:30:38 -0800 |
| commit | 449824c412d759d916b031ddc547f69a5b634b00 (patch) | |
| tree | 9c2416ad8167b1432dc507c8b5f9d4c6ee87585a /src | |
| parent | 3900791a0f747aa42e017152cdca67127d9f8133 (diff) | |
T.P.URI: pBase64DataURI now returns mime + bytes
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/URI.hs | 38 |
2 files changed, 25 insertions, 15 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 139b853dc..b1294fae8 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1844,7 +1844,7 @@ source = do base64DataURI :: PandocMonad m => ParsecT Sources s m Text base64DataURI = do Sources ((pos, txt):rest) <- getInput - let r = A.parse pBase64DataURI txt + let r = A.parse (fst <$> A.match pBase64DataURI) txt case r of A.Done remaining consumed -> do let pos' = incSourceColumn pos (T.length consumed) diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs index 53b7288b2..09a9660d8 100644 --- a/src/Text/Pandoc/URI.hs +++ b/src/Text/Pandoc/URI.hs @@ -18,12 +18,16 @@ module Text.Pandoc.URI ( urlEncode , pBase64DataURI ) where import qualified Network.HTTP.Types as HTTP +import Data.ByteString.Base64 (decodeLenient) +import Text.Pandoc.MIME (MimeType) +import qualified Data.ByteString as B import qualified Text.Pandoc.UTF8 as UTF8 import qualified Data.Text as T import qualified Data.Set as Set import Data.Char (isSpace, isAscii) import Network.URI (URI (uriScheme), parseURI, escapeURIString) import qualified Data.Attoparsec.Text as A +import Data.Text.Encoding (encodeUtf8) import Control.Applicative (many) urlEncode :: T.Text -> T.Text @@ -117,24 +121,30 @@ uriPathToPath (T.unpack -> path) = path #endif -pBase64DataURI :: A.Parser T.Text -pBase64DataURI = fst <$> A.match base64uri +pBase64DataURI :: A.Parser (B.ByteString, MimeType) +pBase64DataURI = base64uri where base64uri = do A.string "data:" - restrictedName - A.char '/' - restrictedName - A.char ';' - many mediaParam - A.string "base64," - A.skipWhile (A.inClass "A-Za-z0-9+/") + mime <- do + n1 <- restrictedName + A.char '/' + n2 <- restrictedName + mps <- many mediaParam + pure $ n1 <> "/" <> n2 <> mconcat mps + A.string ";base64," + b64 <- A.takeWhile (A.inClass "A-Za-z0-9+/") A.skipWhile (== '=') + -- this decode should be lazy: + pure (decodeLenient (encodeUtf8 b64), mime) restrictedName = do - A.satisfy (A.inClass "A-Za-z0-9") - A.skipWhile (A.inClass "A-Za-z0-9!#$&^_.+-") + c <- A.satisfy (A.inClass "A-Za-z0-9") + rest <- A.takeWhile (A.inClass "A-Za-z0-9!#$&^_.+-") + pure $ T.singleton c <> rest mediaParam = do - restrictedName - A.char '=' - A.skipWhile (/=';') A.char ';' + A.skipWhile isSpace + k <- restrictedName + A.char '=' + v <- A.takeWhile (/=';') + pure $ ";" <> k <> "=" <> v |
