From a5576317c9d99ef641e10a68e1fce6656dd26054 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 12 Nov 2025 12:03:13 +0100 Subject: Docx reader: check recursively for caption styles. The docx reader uses caption styles to identify figures and captioned tables. It now checks for known caption styles in the full styles hierarchy of a paragraph instead of just checking the style directly. This allows to recognize caption styles that are built on top of the basic *caption* style, as is sometimes the case in sophisticated styles. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 5d54d70a8..c2ad9e837 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1361,14 +1361,20 @@ findBlip el = do -- return svg if present: filterElementName (\(QName tag _ _) -> tag == "svgBlip") el `mplus` pure blip +-- | Checks if any style in the style hierarchy is a caption style. hasCaptionStyle :: ParagraphStyle -> Bool -hasCaptionStyle parstyle = any (isCaptionStyleName . pStyleName) (pStyle parstyle) +hasCaptionStyle = + any (isCaptionStyleName . pStyleName) . concatMap nestedStyles . pStyle where -- note that these are case insensitive: isCaptionStyleName "caption" = True isCaptionStyleName "table caption" = True isCaptionStyleName "image caption" = True isCaptionStyleName _ = False + -- Gets all the style names in the style hierarchy + nestedStyles :: ParStyle -> [ParStyle] + nestedStyles ps = ps : maybe [] nestedStyles (psParentStyle ps) + stripCaptionLabel :: [Element] -> [Element] stripCaptionLabel els = if any isNumberElt els -- cgit v1.2.3