diff options
| -rw-r--r-- | pandoc.cabal | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 42 | ||||
| -rw-r--r-- | test/command/9002.docx | bin | 0 -> 12631 bytes | |||
| -rw-r--r-- | test/command/9002.md | 20 | ||||
| -rw-r--r-- | test/docx/table_captions_with_field.native | 10 |
5 files changed, 50 insertions, 23 deletions
diff --git a/pandoc.cabal b/pandoc.cabal index 9a4198b43..11b8bb1d6 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -207,6 +207,7 @@ extra-source-files: test/command/*.md test/command/*.csl test/command/*.svg + test/command/9002.docx test/command/biblio.bib test/command/averroes.bib test/command/A.txt diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index aebc4a5d4..71be50c18 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -86,13 +86,15 @@ import Text.Pandoc.XML.Light strContent, showElement, findAttr, + filterChild, filterChildrenName, filterElementName, + lookupAttrBy, parseXMLElement, elChildren, QName(QName, qName), Content(Elem), - Element(elContent, elName), + Element(..), findElements ) data ReaderEnv = ReaderEnv { envNotes :: Notes @@ -725,7 +727,25 @@ elemToBodyPart ns element parstyle <- elemToParagraphStyle ns element <$> asks envParStyles <*> asks envNumbering - parparts' <- mconcat <$> mapD (elemToParPart ns) (elChildren element) + + let hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) + + let isTableNumberElt el@(Element name attribs _ _) = + (qName name == "fldSimple" && + case lookupAttrBy ((== "instr") . qName) attribs of + Nothing -> False + Just instr -> "Table" `elem` T.words instr) || + (qName name == "instrText" && "Table" `elem` T.words (strContent el)) + + let isTable = hasCaptionStyle && + isJust (filterChild isTableNumberElt element) + + let stripOffLabel = dropWhile (not . isTableNumberElt) + + let children = (if isTable + then stripOffLabel + else id) $ elChildren element + parparts' <- mconcat <$> mapD (elemToParPart ns) children fldCharState <- gets stateFldCharState modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState} -- Word uses list enumeration for numbered headings, so we only @@ -734,21 +754,9 @@ elemToBodyPart ns element case pHeading parstyle of Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do mkListItem parstyle numId lvl parparts - _ -> let - hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) - - hasSimpleTableField = fromMaybe False $ do - fldSimple <- findChildByName ns "w" "fldSimple" element - instr <- findAttrByName ns "w" "instr" fldSimple - pure ("Table" `elem` T.words instr) - - hasComplexTableField = fromMaybe False $ do - instrText <- findElementByName ns "w" "instrText" element - pure ("Table" `elem` T.words (strContent instrText)) - - in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) - then return $ TblCaption parstyle parparts - else return $ Paragraph parstyle parparts + _ -> if isTable + then return $ TblCaption parstyle parparts + else return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do diff --git a/test/command/9002.docx b/test/command/9002.docx Binary files differnew file mode 100644 index 000000000..4722c53e5 --- /dev/null +++ b/test/command/9002.docx diff --git a/test/command/9002.md b/test/command/9002.md new file mode 100644 index 000000000..23915dda1 --- /dev/null +++ b/test/command/9002.md @@ -0,0 +1,20 @@ +``` +% pandoc command/9002.docx -t html +^D +<table> +<caption><p>This is my table!</p></caption> +<colgroup> +<col style="width: 50%" /> +<col style="width: 50%" /> +</colgroup> +<thead> +<tr class="header"> +<th>a</th> +<th>b</th> +</tr> +</thead> +<tbody> +</tbody> +</table> +<p>See Table 1 This is my table!</p> +``` diff --git a/test/docx/table_captions_with_field.native b/test/docx/table_captions_with_field.native index deb8afc6b..4f81ce477 100644 --- a/test/docx/table_captions_with_field.native +++ b/test/docx/table_captions_with_field.native @@ -1,7 +1,6 @@ [Para [Str "See",Space,Str "Table",Space,Str "1."] -,Para [] -,Table ("",[],[]) (Caption Nothing - [Para [Str "Table",Space,Str "1"]]) +,Para [Str "Table",Space,Str "1"] +,Table ("",[],[]) (Caption Nothing []) [(AlignDefault,ColWidth 0.7605739372523825) ,(AlignDefault,ColWidth 0.11971303137380876) ,(AlignDefault,ColWidth 0.11971303137380876)] @@ -32,8 +31,7 @@ (TableFoot ("",[],[]) []) ,Header 2 ("section", [], []) [] -,Table ("",[],[]) (Caption Nothing - [Para [Str "Table",Space,Str "2"]]) +,Table ("",[],[]) (Caption Nothing []) [(AlignDefault,ColWidth 0.3332963620230701) ,(AlignDefault,ColWidth 0.3332963620230701) ,(AlignDefault,ColWidth 0.3334072759538598)] @@ -50,5 +48,5 @@ [])] (TableFoot ("",[],[]) []) -,Para [] +,Para [Str "Table",Space,Str "2"] ,Para [Str "See",Space,Str "Table",Space,Str "2."]] |
