diff options
| author | Albert Krewinkel <[email protected]> | 2022-08-28 10:38:12 +0200 |
|---|---|---|
| committer | Albert Krewinkel <[email protected]> | 2022-08-30 10:05:49 +0200 |
| commit | 9b6220c8ed2ae9ddcc58fb2a335c2597d79a6f85 (patch) | |
| tree | 2440d46f800638b6ae397e82da6e5e8d5216643c /src | |
| parent | 7a2e8e51a05b8930c1e8f50b62a8775f5bda088e (diff) | |
Docx reader: mark unnumbered headings with class 'unnumbered'
If a document uses numbered headings, then headings without numbers are
marked with class `unnumbered`, the default class used by pandoc to
convey this kind of information. The classes are not added if none of
the headings in a document are. This change ensures good conversion
results when converting with `--number-sections`.
Closes: #8148
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx.hs | 16 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Docx/Parse.hs | 75 |
2 files changed, 61 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 2ac529b87..fa563f81e 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -117,6 +117,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text , docxAnchorSet :: Set.Set T.Text , docxImmedPrevAnchor :: Maybe T.Text , docxMediaBag :: MediaBag + , docxNumberedHeadings :: Bool , docxDropCap :: Inlines -- keep track of (numId, lvl) values for -- restarting @@ -131,6 +132,7 @@ instance Default DState where , docxAnchorSet = mempty , docxImmedPrevAnchor = Nothing , docxMediaBag = mempty + , docxNumberedHeadings = False , docxDropCap = mempty , docxListState = M.empty , docxPrevPara = mempty @@ -662,10 +664,17 @@ bodyPartToBlocks (Paragraph pPr parparts) T.concat $ map parPartToText parparts | Just (style, n) <- pHeading pPr = do - ils <-local (\s-> s{docxInHeaderBlock=True}) + ils <- local (\s-> s{docxInHeaderBlock=True}) (smushInlines <$> mapM parPartToInlines parparts) + let classes = map normalizeToClassName . delete style + $ getStyleNames (pStyle pPr) + + hasNumbering <- gets docxNumberedHeadings + let addNum = if hasNumbering && not (numbered pPr) + then (++ ["unnumbered"]) + else id makeHeaderAnchor $ - headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils + headerWith ("", addNum classes, []) n ils | otherwise = do ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts prevParaIls <- gets docxPrevPara @@ -812,6 +821,9 @@ bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps + let isNumberedPara (Paragraph pPr _) = numbered pPr + isNumberedPara _ = False + modify (\s -> s { docxNumberedHeadings = any isNumberedPara blkbps }) modify (\s -> s { docxTableCaptions = captions }) blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index af8143fac..956e8b225 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -219,6 +219,7 @@ data TrackedChange = TrackedChange ChangeType ChangeInfo data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle] , indentation :: Maybe ParIndentation + , numbered :: Bool , dropCap :: Bool , pChange :: Maybe TrackedChange , pBidi :: Maybe Bool @@ -228,6 +229,7 @@ data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle] defaultParagraphStyle :: ParagraphStyle defaultParagraphStyle = ParagraphStyle { pStyle = [] , indentation = Nothing + , numbered = False , dropCap = False , pChange = Nothing , pBidi = Just False @@ -688,6 +690,11 @@ pHeading = getParStyleField headingLev . pStyle pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) pNumInfo = getParStyleField numInfo . pStyle +mkListItem :: ParagraphStyle -> Text -> Text -> [ParPart] -> D BodyPart +mkListItem parstyle numId lvl parparts = do + lvlInfo <- lookupLevel numId lvl <$> asks envNumbering + return $ ListItem parstyle numId lvl lvlInfo parparts + pStyleIndentation :: ParagraphStyle -> Maybe ParIndentation pStyleIndentation style = (getParStyleField indent . pStyle) style @@ -700,38 +707,43 @@ elemToBodyPart ns element elemToBodyPart ns element | isElem ns "w" "p" element , Just (numId, lvl) <- getNumInfo ns element = do - parstyle <- elemToParagraphStyle ns element <$> asks envParStyles + parstyle <- elemToParagraphStyle ns element + <$> asks envParStyles + <*> asks envNumbering parparts <- mconcat <$> mapD (elemToParPart ns) (elChildren element) - levelInfo <- lookupLevel numId lvl <$> asks envNumbering - return $ ListItem parstyle numId lvl levelInfo parparts + case pHeading parstyle of + Nothing -> mkListItem parstyle numId lvl parparts + Just _ -> do + return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "p" element = do - parstyle <- elemToParagraphStyle ns element <$> asks envParStyles + parstyle <- elemToParagraphStyle ns element + <$> asks envParStyles + <*> asks envNumbering parparts' <- mconcat <$> mapD (elemToParPart ns) (elChildren element) fldCharState <- gets stateFldCharState modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState} -- Word uses list enumeration for numbered headings, so we only -- want to infer a list from the styles if it is NOT a heading. - let parparts = parparts' ++ (openFldCharsToParParts fldCharState) in - case pHeading parstyle of - Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do - levelInfo <- lookupLevel numId lvl <$> asks envNumbering - return $ ListItem parstyle numId lvl levelInfo parparts - _ -> let - hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) - - hasSimpleTableField = fromMaybe False $ do - fldSimple <- findChildByName ns "w" "fldSimple" element - instr <- findAttrByName ns "w" "instr" fldSimple - pure ("Table" `elem` T.words instr) - - hasComplexTableField = fromMaybe False $ do - instrText <- findElementByName ns "w" "instrText" element - pure ("Table" `elem` T.words (strContent instrText)) - - in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) - then return $ TblCaption parstyle parparts - else return $ Paragraph parstyle parparts + let parparts = parparts' ++ (openFldCharsToParParts fldCharState) + case pHeading parstyle of + Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do + mkListItem parstyle numId lvl parparts + _ -> let + hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) + + hasSimpleTableField = fromMaybe False $ do + fldSimple <- findChildByName ns "w" "fldSimple" element + instr <- findAttrByName ns "w" "instr" fldSimple + pure ("Table" `elem` T.words instr) + + hasComplexTableField = fromMaybe False $ do + instrText <- findElementByName ns "w" "instrText" element + pure ("Table" `elem` T.words (strContent instrText)) + + in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) + then return $ TblCaption parstyle parparts + else return $ Paragraph parstyle parparts elemToBodyPart ns element | isElem ns "w" "tbl" element = do @@ -1115,15 +1127,22 @@ getTrackedChange ns element Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate) getTrackedChange _ _ = Nothing -elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle -elemToParagraphStyle ns element sty +elemToParagraphStyle :: NameSpaces -> Element + -> ParStyleMap + -> Numbering + -> ParagraphStyle +elemToParagraphStyle ns element sty numbering | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe (fmap ParaStyleId . findAttrByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) + pStyle' = mapMaybe (`M.lookup` sty) style in ParagraphStyle - {pStyle = mapMaybe (`M.lookup` sty) style + {pStyle = pStyle' + , numbered = case getNumInfo ns element of + Just (numId, lvl) -> isJust $ lookupLevel numId lvl numbering + Nothing -> isJust $ getParStyleField numInfo pStyle' , indentation = getIndentation ns element , dropCap = @@ -1143,7 +1162,7 @@ elemToParagraphStyle ns element sty getTrackedChange ns , pBidi = checkOnOff ns pPr (elemName ns "w" "bidi") } -elemToParagraphStyle _ _ _ = defaultParagraphStyle + | otherwise = defaultParagraphStyle elemToRunStyleD :: NameSpaces -> Element -> D RunStyle elemToRunStyleD ns element |
