aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2025-09-17 14:18:08 +0200
committerJohn MacFarlane <[email protected]>2025-09-17 14:24:33 +0200
commitd820620ccd519e1793c63845bbf0a4d31c01976e (patch)
tree5fd721d5bc6224b81b9980d63d22243afaf77f39 /src
parentbbd7b60432be3f4ff0e37c2e3e33ed0121a9ecd3 (diff)
Docx reader: properly calculate table column widths.
Previously we assumed that every table took up the full text width. Now we read the text width from the document's sectPr. Closes #9837. Closes #11147.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs3
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs36
2 files changed, 32 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 5c1859b13..b00dbd247 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -833,8 +833,7 @@ bodyPartToBlocks (Tbl mbsty cap grid look parts) = do
alignments = case rows of
[] -> replicate width Pandoc.AlignDefault
Docx.Row _ cs : _ -> concatMap getAlignment cs
- totalWidth = sum grid
- widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
+ widths = map ColWidth grid
extStylesEnabled <- asks (isEnabled Ext_styles . docxOptions)
let attr = case mbsty of
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 826f723f6..08c380bc1 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -110,6 +110,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
, envParStyles :: ParStyleMap
, envLocation :: DocumentLocation
, envDocXmlPath :: FilePath
+ , envTextWidth :: Int
}
deriving Show
@@ -272,7 +273,7 @@ data BodyPart = Paragraph ParagraphStyle [ParPart]
| HRule
deriving Show
-type TblGrid = [Integer]
+type TblGrid = [Double]
newtype TblLook = TblLook {firstRowFormatting::Bool}
deriving Show
@@ -403,6 +404,7 @@ archiveToDocxWithWarnings archive = do
rels = archiveToRelationships archive docXmlPath
media = filteredFilesFromArchive archive filePathIsMedia
(styles, parstyles) = archiveToStyles archive
+ textWidth = archiveToTextWidth archive
rEnv = ReaderEnv { envNotes = notes
, envComments = comments
, envNumbering = numbering
@@ -413,6 +415,7 @@ archiveToDocxWithWarnings archive = do
, envParStyles = parstyles
, envLocation = InDocument
, envDocXmlPath = docXmlPath
+ , envTextWidth = fromMaybe 9638 textWidth
}
rState = ReaderState { stateWarnings = []
, stateFldCharState = []
@@ -636,6 +639,20 @@ archiveToNumbering :: Archive -> Numbering
archiveToNumbering archive =
fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive)
+archiveToTextWidth :: Archive -> Maybe Int
+archiveToTextWidth zf = do
+ entry <- findEntryByPath "word/document.xml" zf
+ docElem <- parseXMLFromEntry entry
+ let ns = elemToNameSpaces docElem
+ sectElem <- findChildByName ns "w" "body" docElem >>= findChildByName ns "w" "sectPr"
+ pgWidth <- findChildByName ns "w" "pgSz" sectElem
+ >>= findAttrByName ns "w" "w" >>= safeRead
+ pgMar <- findChildByName ns "w" "pgMar" sectElem
+ leftMargin <- findAttrByName ns "w" "left" pgMar >>= safeRead
+ rightMargin <- findAttrByName ns "w" "right" pgMar >>= safeRead
+ gutter <- findAttrByName ns "w" "gutter" pgMar >>= safeRead
+ return $ pgWidth - (leftMargin + rightMargin + gutter)
+
elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
elemToNotes ns notetype element
| isElem ns "w" (notetype <> "s") element =
@@ -664,11 +681,20 @@ elemToComments _ _ = M.empty
---------------------------------------------
elemToTblGrid :: NameSpaces -> Element -> D TblGrid
-elemToTblGrid ns element | isElem ns "w" "tblGrid" element =
+elemToTblGrid ns element | isElem ns "w" "tblGrid" element = do
let cols = findChildrenByName ns "w" "gridCol" element
- in
- mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger))
- cols
+ textWidth <- asks envTextWidth
+ -- space between cols is 10 twips, so we subtract this:
+ let totalWidth = textWidth - (10 * (length cols - 1))
+ let toFraction :: Int -> Double
+ toFraction x = fromIntegral x / fromIntegral totalWidth
+ let normalizeFractions xs =
+ case sum xs of
+ tot | tot > 1.0 -> map (/ tot) xs
+ _ -> xs
+ normalizeFractions <$>
+ mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>=
+ fmap toFraction . safeRead)) cols
elemToTblGrid _ _ = throwError WrongElem
elemToTblLook :: NameSpaces -> Element -> D TblLook