diff options
| author | John MacFarlane <[email protected]> | 2023-08-18 17:50:59 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2023-08-18 17:50:59 -0700 |
| commit | 068fce4293eb139f54d4825e1dbdcaf35e34da03 (patch) | |
| tree | b8bf3cd3919a87055a13ef54367e9cb9ff4da849 /src | |
| parent | bd4de143d076fc46e3c7b6b5e95e421585780639 (diff) | |
Docx reader: omit "Table NN" from caption.
Closes #9002.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 42 |
1 files changed, 25 insertions, 17 deletions
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index aebc4a5d4..71be50c18 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -86,13 +86,15 @@ import Text.Pandoc.XML.Light strContent, showElement, findAttr, + filterChild, filterChildrenName, filterElementName, + lookupAttrBy, parseXMLElement, elChildren, QName(QName, qName), Content(Elem), - Element(elContent, elName), + Element(..), findElements ) data ReaderEnv = ReaderEnv { envNotes :: Notes @@ -725,7 +727,25 @@ elemToBodyPart ns element parstyle <- elemToParagraphStyle ns element <$> asks envParStyles <*> asks envNumbering - parparts' <- mconcat <$> mapD (elemToParPart ns) (elChildren element) + + let hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) + + let isTableNumberElt el@(Element name attribs _ _) = + (qName name == "fldSimple" && + case lookupAttrBy ((== "instr") . qName) attribs of + Nothing -> False + Just instr -> "Table" `elem` T.words instr) || + (qName name == "instrText" && "Table" `elem` T.words (strContent el)) + + let isTable = hasCaptionStyle && + isJust (filterChild isTableNumberElt element) + + let stripOffLabel = dropWhile (not . isTableNumberElt) + + let children = (if isTable + then stripOffLabel + else id) $ elChildren element + parparts' <- mconcat <$> mapD (elemToParPart ns) children fldCharState <- gets stateFldCharState modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState} -- Word uses list enumeration for numbered headings, so we only @@ -734,21 +754,9 @@ elemToBodyPart ns element case pHeading parstyle of Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do mkListItem parstyle numId lvl parparts - _ -> let - hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) - - hasSimpleTableField = fromMaybe False $ do - fldSimple <- findChildByName ns "w" "fldSimple" element - instr <- findAttrByName ns "w" "instr" fldSimple - pure ("Table" `elem` T.words instr) - - hasComplexTableField = fromMaybe False $ do - instrText <- findElementByName ns "w" "instrText" element - pure ("Table" `elem` T.words (strContent instrText)) - - in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) - then return $ TblCaption parstyle parparts - else return $ Paragraph parstyle parparts + _ -> if isTable + then return $ TblCaption parstyle parparts + else return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do |
