From c0b66602c2177ef871a11cf8c29d49062c612072 Mon Sep 17 00:00:00 2001 From: You Jiangbin <30883834+Harryoung@users.noreply.github.com> Date: Wed, 7 Jan 2026 18:21:06 +0800 Subject: Fix docx writer: skip directory entries when building media overrides (#11379) Pandoc's docx writer was previously adding an `` for `/word/media/` in `[Content_Types].xml` when the reference doc contains media, which violates OPC rules and causes Word to report corruption. --- src/Text/Pandoc/Writers/Docx.hs | 3 ++- test/Tests/Writers/Docx.hs | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1a4bc4ca2..44dd168be 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -297,7 +297,8 @@ writeDocx opts doc = do map mkImageOverride imgs ++ [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive - , "word/media/" `isPrefixOf` eRelativePath e ] + , "word/media/" `isPrefixOf` eRelativePath e + , not ("/" `isSuffixOf` eRelativePath e) ] let mkDefaultNode (ext, mt) = mknode "Default" [("Extension",ext),("ContentType",mt)] () diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs index 6494f4226..9c1aec89a 100644 --- a/test/Tests/Writers/Docx.hs +++ b/test/Tests/Writers/Docx.hs @@ -1,10 +1,15 @@ module Tests.Writers.Docx (tests) where -import Text.Pandoc +import Codec.Archive.Zip (findEntryByPath, fromEntry, toArchive) +import Data.List (isPrefixOf) +import Data.Text (Text) +import qualified Data.Text as Text +import qualified Data.Text.IO as T import Test.Tasty -import Tests.Writers.OOXML import Test.Tasty.HUnit -import Data.List (isPrefixOf) +import Tests.Writers.OOXML +import Text.Pandoc +import Text.XML.Light (QName(QName), findAttr, findElements, parseXMLDoc) -- we add an extra check to make sure that we're not writing in the -- toplevel docx directory. We don't want to accidentally overwrite an @@ -214,4 +219,29 @@ tests = [ testGroup "inlines" "docx/document-properties-short-desc.native" "docx/golden/document-properties-short-desc.docx" ] + , testGroup "reference docx" + [ testCase "no media directory override in content types" $ do + let opts = def{ writerReferenceDoc = Just "docx/inline_images.docx" } + txt <- T.readFile "docx/inline_formatting.native" + bs <- runIOorExplode $ do + mblang <- toLang (Just (Text.pack "en-US") :: Maybe Text) + maybe (return ()) setTranslations mblang + setVerbosity ERROR + readNative def txt >>= writeDocx opts + let archive = toArchive bs + entry <- case findEntryByPath "[Content_Types].xml" archive of + Nothing -> assertFailure "Missing [Content_Types].xml in output docx" + Just e -> return e + doc <- case parseXMLDoc (fromEntry entry) of + Nothing -> assertFailure "Failed to parse [Content_Types].xml" + Just d -> return d + let partNameAttr = QName "PartName" Nothing Nothing + let overrideName = QName "Override" Nothing Nothing + let overrides = findElements overrideName doc + let hasBadOverride = + any (\el -> findAttr partNameAttr el == Just "/word/media/") + overrides + assertBool "Found invalid /word/media/ Override in [Content_Types].xml" + (not hasBadOverride) + ] ] -- cgit v1.2.3