diff options
| author | John MacFarlane <[email protected]> | 2026-01-18 19:24:02 +0100 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2026-01-18 19:24:02 +0100 |
| commit | 1eab54eb92ba89e4e98aad2b54bc154d5c5cbc69 (patch) | |
| tree | 7475c452eab82a6d01cfeac95a9e89aac6ebddc4 | |
| parent | 363135f65e2d6d5751acf427839130f672ead778 (diff) | |
Allow --extract-media to extract to a tar archive...
instead of a directory. This happens when the path given has a
.tar extension.
| -rw-r--r-- | MANUAL.txt | 6 | ||||
| -rw-r--r-- | pandoc.cabal | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Class/IO.hs | 25 |
3 files changed, 27 insertions, 5 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index 28d4aa160..fe8e9a3ae 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -736,7 +736,7 @@ header when requesting a document from a URL: affected paragraph break. This option only affects the docx reader. -`--extract-media=`*DIR* +`--extract-media=`*DIR*|*FILE*`.tar` : Extract images and other media contained in or linked from the source document to the path *DIR*, creating it if @@ -748,6 +748,10 @@ header when requesting a document from a URL: Otherwise filenames are constructed from the SHA1 hash of the contents. + If the path given ends in `.tar`, then instead of creating + a directory, pandoc will create a tar archive with the media + files. + `--abbreviations=`*FILE* : Specifies a custom abbreviations file, with abbreviations diff --git a/pandoc.cabal b/pandoc.cabal index 962123d1d..97b50ea42 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -568,6 +568,7 @@ library vector >= 0.12 && < 0.14, djot >= 0.1.2.4 && < 0.2, asciidoc >= 0.1 && < 0.2, + tar >= 0.7 && < 0.8 if !os(windows) build-depends: unix >= 2.4 && < 2.9 diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 612ab7f51..c1bbff714 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import qualified Data.CaseInsensitive as CI #endif import Network.URI (URI(..), parseURI, unEscapeString) import System.Directory (createDirectoryIfMissing) -import System.FilePath ((</>), takeDirectory, normalise) +import System.FilePath ((</>), takeDirectory, takeFileName, normalise, takeExtension) import qualified System.FilePath.Posix as Posix import System.IO (stderr) import System.IO.Error @@ -88,6 +88,8 @@ import qualified System.Environment as Env import qualified System.FilePath.Glob import qualified System.Random import qualified Text.Pandoc.UTF8 as UTF8 +import Codec.Archive.Tar (write) +import Codec.Archive.Tar.Entry (fileEntry, toTarPath) #ifndef EMBED_DATA_FILES import qualified Paths_pandoc as Paths #endif @@ -245,16 +247,31 @@ alertIndent (l:ls) = do where go l' = do UTF8.hPutStr stderr " " UTF8.hPutStrLn stderr l' --- | Extract media from the mediabag into a directory. +-- | Extract media from the mediabag into a directory (or a tar archive if the +-- path supplied ends in @.tar@. extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc -extractMedia dir d = do +extractMedia path d = do media <- getMediaBag let items = mediaItems media + let (dir, mbTar) = case takeExtension path of + ".tar" -> (takeDirectory path, Just (takeFileName path)) + _ -> (path, Nothing) if null items then return d else do - mapM_ (writeMedia dir) items + case mbTar of + Just fname -> case write <$> traverse toEntry items of + Left e -> throwError + (PandocSomeError (T.pack + ("Could not create " <> path <> ":\n" <> e))) + Right tar -> writeMedia dir + (fname, "application/x-tar", tar) + Nothing -> mapM_ (writeMedia dir) items return $ walk (adjustImagePath dir media) d + where + toEntry (fp, _mime, content) = do + tarPath <- toTarPath False fp -- False = file, not directory + pure $ fileEntry tarPath content -- | Write the contents of a media bag to a path. -- If the path contains URI escape sequences (percent-encoding), |
