aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2023-07-20 09:26:38 -0700
committerJohn MacFarlane <[email protected]>2023-07-20 11:51:28 -0700
commiteddedbfc14916aa06fc01ff04b38aeb30ae2e625 (patch)
treee9e8f62d6df0f071b82854b07355e31592eff48d /src
parentdb2594aed4320423a13d2e1714279a9a801fb5f3 (diff)
Fix new variant of the vulnerability in CVE-2023-35936.
Guilhem Moulin noticed that the fix to CVE-2023-35936 was incomplete. An attacker could get around it by double-encoding the malicious extension to create or override arbitrary files. $ echo '![](data://image/png;base64,cHJpbnQgImhlbGxvIgo=;.lua+%252f%252e%252e%252f%252e%252e%252fb%252elua)' >b.md $ .cabal/bin/pandoc b.md --extract-media=bar <p><img src="bar/2a0eaa89f43fada3e6c577beea4f2f8f53ab6a1d.lua+%2f%2e%2e%2f%2e%2e%2fb%2elua" /></p> $ cat b.lua print "hello" $ find bar bar/ bar/2a0eaa89f43fada3e6c577beea4f2f8f53ab6a1d.lua+ This commit adds a test case for this more complex attack and fixes the vulnerability. (The fix is quite simple: if the URL-unescaped filename or extension contains a '%', we just use the sha1 hash of the contents as the canonical name, just as we do if the filename contains '..'.)
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Class/IO.hs2
-rw-r--r--src/Text/Pandoc/MediaBag.hs7
2 files changed, 6 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index 86ed83c89..2ae3b5cee 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -224,6 +224,8 @@ writeMedia :: (PandocMonad m, MonadIO m)
-> m ()
writeMedia dir (fp, _mt, bs) = do
-- we normalize to get proper path separators for the platform
+ -- we unescape URI encoding, but given how insertMedia
+ -- is written, we shouldn't have any % in a canonical media name...
let fullpath = normalise $ dir </> unEscapeString fp
liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
report $ Extracting (T.pack fullpath)
diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs
index bb75f4591..18a40a6dc 100644
--- a/src/Text/Pandoc/MediaBag.hs
+++ b/src/Text/Pandoc/MediaBag.hs
@@ -90,16 +90,17 @@ insertMedia fp mbMime contents (MediaBag mediamap) =
&& Windows.isRelative fp''
&& isNothing uri
&& not (".." `isInfixOf` fp'')
+ && '%' `notElem` fp''
then fp''
- else showDigest (sha1 contents) <> "." <> ext
+ else showDigest (sha1 contents) <> ext
fallback = case takeExtension fp'' of
".gz" -> getMimeTypeDef $ dropExtension fp''
_ -> getMimeTypeDef fp''
mt = fromMaybe fallback mbMime
path = maybe fp'' (unEscapeString . uriPath) uri
ext = case takeExtension path of
- '.':e -> e
- _ -> maybe "" T.unpack $ extensionFromMimeType mt
+ '.':e | '%' `notElem` e -> '.':e
+ _ -> maybe "" (\x -> '.':T.unpack x) $ extensionFromMimeType mt
-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
lookupMedia :: FilePath