aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2024-12-19 13:30:38 -0800
committerJohn MacFarlane <[email protected]>2024-12-19 13:30:38 -0800
commit449824c412d759d916b031ddc547f69a5b634b00 (patch)
tree9c2416ad8167b1432dc507c8b5f9d4c6ee87585a /src
parent3900791a0f747aa42e017152cdca67127d9f8133 (diff)
T.P.URI: pBase64DataURI now returns mime + bytes
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs2
-rw-r--r--src/Text/Pandoc/URI.hs38
2 files changed, 25 insertions, 15 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 139b853dc..b1294fae8 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1844,7 +1844,7 @@ source = do
base64DataURI :: PandocMonad m => ParsecT Sources s m Text
base64DataURI = do
Sources ((pos, txt):rest) <- getInput
- let r = A.parse pBase64DataURI txt
+ let r = A.parse (fst <$> A.match pBase64DataURI) txt
case r of
A.Done remaining consumed -> do
let pos' = incSourceColumn pos (T.length consumed)
diff --git a/src/Text/Pandoc/URI.hs b/src/Text/Pandoc/URI.hs
index 53b7288b2..09a9660d8 100644
--- a/src/Text/Pandoc/URI.hs
+++ b/src/Text/Pandoc/URI.hs
@@ -18,12 +18,16 @@ module Text.Pandoc.URI ( urlEncode
, pBase64DataURI
) where
import qualified Network.HTTP.Types as HTTP
+import Data.ByteString.Base64 (decodeLenient)
+import Text.Pandoc.MIME (MimeType)
+import qualified Data.ByteString as B
import qualified Text.Pandoc.UTF8 as UTF8
import qualified Data.Text as T
import qualified Data.Set as Set
import Data.Char (isSpace, isAscii)
import Network.URI (URI (uriScheme), parseURI, escapeURIString)
import qualified Data.Attoparsec.Text as A
+import Data.Text.Encoding (encodeUtf8)
import Control.Applicative (many)
urlEncode :: T.Text -> T.Text
@@ -117,24 +121,30 @@ uriPathToPath (T.unpack -> path) =
path
#endif
-pBase64DataURI :: A.Parser T.Text
-pBase64DataURI = fst <$> A.match base64uri
+pBase64DataURI :: A.Parser (B.ByteString, MimeType)
+pBase64DataURI = base64uri
where
base64uri = do
A.string "data:"
- restrictedName
- A.char '/'
- restrictedName
- A.char ';'
- many mediaParam
- A.string "base64,"
- A.skipWhile (A.inClass "A-Za-z0-9+/")
+ mime <- do
+ n1 <- restrictedName
+ A.char '/'
+ n2 <- restrictedName
+ mps <- many mediaParam
+ pure $ n1 <> "/" <> n2 <> mconcat mps
+ A.string ";base64,"
+ b64 <- A.takeWhile (A.inClass "A-Za-z0-9+/")
A.skipWhile (== '=')
+ -- this decode should be lazy:
+ pure (decodeLenient (encodeUtf8 b64), mime)
restrictedName = do
- A.satisfy (A.inClass "A-Za-z0-9")
- A.skipWhile (A.inClass "A-Za-z0-9!#$&^_.+-")
+ c <- A.satisfy (A.inClass "A-Za-z0-9")
+ rest <- A.takeWhile (A.inClass "A-Za-z0-9!#$&^_.+-")
+ pure $ T.singleton c <> rest
mediaParam = do
- restrictedName
- A.char '='
- A.skipWhile (/=';')
A.char ';'
+ A.skipWhile isSpace
+ k <- restrictedName
+ A.char '='
+ v <- A.takeWhile (/=';')
+ pure $ ";" <> k <> "=" <> v