aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2025-01-10 10:28:57 -0800
committerJohn MacFarlane <[email protected]>2025-01-10 10:30:08 -0800
commit6051d62e56f4c64857ac5d6b2d6fab9390f6ea5a (patch)
treeba081a305d3f99d176fafe35fccf934bf6c77ee6
parentb108cef9dd7dfc7dbc7a61f5416c1449e10eedbd (diff)
Docx reader: read table styles as custom styles...
...when `styles` extension is enabled. Closes #9603. Also improve manual's coverage of custom styles.
-rw-r--r--MANUAL.txt30
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs10
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs7
-rw-r--r--test/command/9603.docxbin0 -> 13799 bytes
-rw-r--r--test/command/9603.md23
6 files changed, 53 insertions, 18 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 6ec998db9..7a7899319 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3753,11 +3753,14 @@ output formats
### Extension: `styles` ### {#ext-styles}
-When converting from docx, read all docx styles as divs (for
-paragraph styles) and spans (for character styles) regardless
-of whether pandoc understands the meaning of these styles.
-This can be used with [docx custom styles](#custom-styles).
-Disabled by default.
+When converting from docx, add `custom-styles` attributes
+for all docx styles, regardless of whether pandoc understands
+the meanings of these styles. Because attributes cannot be
+added directly to paragraphs or text in the pandoc AST,
+paragraph styles will cause Divs to be created and character
+styles will cause Spans to be created to hold the attributes.
+(Table styles will be added to the Table elements directly.)
+This extension can be used with [docx custom styles](#custom-styles).
input formats
: `docx`
@@ -7354,11 +7357,11 @@ However, if you need to apply your own styles to blocks, or match a preexisting
set of styles, pandoc allows you to define custom styles for blocks and text
using `div`s and `span`s, respectively.
-If you define a `div` or `span` with the attribute `custom-style`,
-pandoc will apply your specified style to the contained elements (with
-the exception of elements whose function depends on a style, like
-headings, code blocks, block quotes, or links). So, for example, using
-the `bracketed_spans` syntax,
+If you define a Div, Span, or Table with the attribute
+`custom-style`, pandoc will apply your specified style to the
+contained elements (with the exception of elements whose function
+depends on a style, like headings, code blocks, block quotes, or
+links). So, for example, using the `bracketed_spans` syntax,
[Get out]{custom-style="Emphatically"}, he said.
@@ -7400,9 +7403,10 @@ interpreting the derivation of the input document's styles.
By enabling the [`styles` extension](#ext-styles) in the docx reader
(`-f docx+styles`), you can produce output that maintains the styles
-of the input document, using the `custom-style` class. Paragraph
-styles are interpreted as divs, while character styles are interpreted
-as spans.
+of the input document, using the `custom-style` class. A `custom-style`
+attribute will be added for each style. Divs will be created to
+hold the paragraph styles, and Spans to hold the character styles.
+Table styles will be applied directly to the Table.
For example, using the `custom-style-reference.docx` file in the test
directory, we have the following different outputs:
diff --git a/pandoc.cabal b/pandoc.cabal
index 08e863812..fdaa5caf2 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -219,6 +219,7 @@ extra-source-files:
test/command/9391.docx
test/command/9358.docx
test/command/9002.docx
+ test/command/9603.docx
test/command/biblio.bib
test/command/averroes.bib
test/command/A.txt
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 7abbc461d..efe104af7 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -807,9 +807,9 @@ bodyPartToBlocks (Captioned parstyle parparts bpart) = do
[Para im@[Image{}]]
-> pure $ singleton $ Figure nullAttr capt [Plain im]
_ -> pure captContents
-bodyPartToBlocks (Tbl _ _ _ []) =
+bodyPartToBlocks (Tbl _ _ _ _ []) =
return mempty
-bodyPartToBlocks (Tbl cap grid look parts) = do
+bodyPartToBlocks (Tbl mbsty cap grid look parts) = do
let fullCaption = if T.null cap then mempty else plain (text cap)
let shortCaption = if T.null cap then Nothing else Just (toList (text cap))
cap' = caption shortCaption fullCaption
@@ -831,7 +831,11 @@ bodyPartToBlocks (Tbl cap grid look parts) = do
totalWidth = sum grid
widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid
- return $ table cap'
+ extStylesEnabled <- asks (isEnabled Ext_styles . docxOptions)
+ let attr = case mbsty of
+ Just sty | extStylesEnabled -> ("", [], [("custom-style", sty)])
+ _ -> nullAttr
+ return $ tableWith attr cap'
(zip alignments widths)
(TableHead nullAttr headerCells)
[TableBody nullAttr 0 [] bodyCells]
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 20e510d8d..99ffcaf09 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -287,7 +287,7 @@ data BodyPart = Paragraph ParagraphStyle [ParPart]
| Heading Int ParaStyleName ParagraphStyle T.Text T.Text (Maybe Level)
[ParPart]
| ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
- | Tbl T.Text TblGrid TblLook [Row]
+ | Tbl (Maybe T.Text) T.Text TblGrid TblLook [Row]
| Captioned ParagraphStyle [ParPart] BodyPart
| HRule
deriving Show
@@ -855,6 +855,9 @@ elemToBodyPart ns element
description = fromMaybe "" $ tblProperties
>>= findChildByName ns "w" "tblDescription"
>>= findAttrByName ns "w" "val"
+ mbstyle = tblProperties
+ >>= findChildByName ns "w" "tblStyle"
+ >>= findAttrByName ns "w" "val"
grid' = case findChildByName ns "w" "tblGrid" element of
Just g -> elemToTblGrid ns g
Nothing -> return []
@@ -867,7 +870,7 @@ elemToBodyPart ns element
grid <- grid'
tblLook <- tblLook'
rows <- mapD (elemToRow ns) (elChildren element)
- return $ Tbl (caption <> description) grid tblLook rows
+ return $ Tbl mbstyle (caption <> description) grid tblLook rows
elemToBodyPart _ _ = throwError WrongElem
lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target
diff --git a/test/command/9603.docx b/test/command/9603.docx
new file mode 100644
index 000000000..7d70a1249
--- /dev/null
+++ b/test/command/9603.docx
Binary files differ
diff --git a/test/command/9603.md b/test/command/9603.md
new file mode 100644
index 000000000..fda2a777d
--- /dev/null
+++ b/test/command/9603.md
@@ -0,0 +1,23 @@
+```
+% pandoc command/9603.docx -t html -f docx+styles
+^D
+<p>A table with a contemporary style:</p>
+<table data-custom-style="TableContemporary">
+<colgroup>
+<col style="width: 50%" />
+<col style="width: 50%" />
+</colgroup>
+<thead>
+<tr>
+<th>Test</th>
+<th>Column</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>1</td>
+<td>2</td>
+</tr>
+</tbody>
+</table>
+```