aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAlbert Krewinkel <[email protected]>2022-08-28 10:38:12 +0200
committerAlbert Krewinkel <[email protected]>2022-08-30 10:05:49 +0200
commit9b6220c8ed2ae9ddcc58fb2a335c2597d79a6f85 (patch)
tree2440d46f800638b6ae397e82da6e5e8d5216643c /src
parent7a2e8e51a05b8930c1e8f50b62a8775f5bda088e (diff)
Docx reader: mark unnumbered headings with class 'unnumbered'
If a document uses numbered headings, then headings without numbers are marked with class `unnumbered`, the default class used by pandoc to convey this kind of information. The classes are not added if none of the headings in a document are. This change ensures good conversion results when converting with `--number-sections`. Closes: #8148
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs16
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs75
2 files changed, 61 insertions, 30 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 2ac529b87..fa563f81e 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -117,6 +117,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
, docxAnchorSet :: Set.Set T.Text
, docxImmedPrevAnchor :: Maybe T.Text
, docxMediaBag :: MediaBag
+ , docxNumberedHeadings :: Bool
, docxDropCap :: Inlines
-- keep track of (numId, lvl) values for
-- restarting
@@ -131,6 +132,7 @@ instance Default DState where
, docxAnchorSet = mempty
, docxImmedPrevAnchor = Nothing
, docxMediaBag = mempty
+ , docxNumberedHeadings = False
, docxDropCap = mempty
, docxListState = M.empty
, docxPrevPara = mempty
@@ -662,10 +664,17 @@ bodyPartToBlocks (Paragraph pPr parparts)
T.concat $
map parPartToText parparts
| Just (style, n) <- pHeading pPr = do
- ils <-local (\s-> s{docxInHeaderBlock=True})
+ ils <- local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
+ let classes = map normalizeToClassName . delete style
+ $ getStyleNames (pStyle pPr)
+
+ hasNumbering <- gets docxNumberedHeadings
+ let addNum = if hasNumbering && not (numbered pPr)
+ then (++ ["unnumbered"])
+ else id
makeHeaderAnchor $
- headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils
+ headerWith ("", addNum classes, []) n ils
| otherwise = do
ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts
prevParaIls <- gets docxPrevPara
@@ -812,6 +821,9 @@ bodyToOutput (Body bps) = do
let (metabps, blkbps) = sepBodyParts bps
meta <- bodyPartsToMeta metabps
captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps
+ let isNumberedPara (Paragraph pPr _) = numbered pPr
+ isNumberedPara _ = False
+ modify (\s -> s { docxNumberedHeadings = any isNumberedPara blkbps })
modify (\s -> s { docxTableCaptions = captions })
blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index af8143fac..956e8b225 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -219,6 +219,7 @@ data TrackedChange = TrackedChange ChangeType ChangeInfo
data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle]
, indentation :: Maybe ParIndentation
+ , numbered :: Bool
, dropCap :: Bool
, pChange :: Maybe TrackedChange
, pBidi :: Maybe Bool
@@ -228,6 +229,7 @@ data ParagraphStyle = ParagraphStyle { pStyle :: [ParStyle]
defaultParagraphStyle :: ParagraphStyle
defaultParagraphStyle = ParagraphStyle { pStyle = []
, indentation = Nothing
+ , numbered = False
, dropCap = False
, pChange = Nothing
, pBidi = Just False
@@ -688,6 +690,11 @@ pHeading = getParStyleField headingLev . pStyle
pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text)
pNumInfo = getParStyleField numInfo . pStyle
+mkListItem :: ParagraphStyle -> Text -> Text -> [ParPart] -> D BodyPart
+mkListItem parstyle numId lvl parparts = do
+ lvlInfo <- lookupLevel numId lvl <$> asks envNumbering
+ return $ ListItem parstyle numId lvl lvlInfo parparts
+
pStyleIndentation :: ParagraphStyle -> Maybe ParIndentation
pStyleIndentation style = (getParStyleField indent . pStyle) style
@@ -700,38 +707,43 @@ elemToBodyPart ns element
elemToBodyPart ns element
| isElem ns "w" "p" element
, Just (numId, lvl) <- getNumInfo ns element = do
- parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
+ parstyle <- elemToParagraphStyle ns element
+ <$> asks envParStyles
+ <*> asks envNumbering
parparts <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
- levelInfo <- lookupLevel numId lvl <$> asks envNumbering
- return $ ListItem parstyle numId lvl levelInfo parparts
+ case pHeading parstyle of
+ Nothing -> mkListItem parstyle numId lvl parparts
+ Just _ -> do
+ return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "p" element = do
- parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
+ parstyle <- elemToParagraphStyle ns element
+ <$> asks envParStyles
+ <*> asks envNumbering
parparts' <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
fldCharState <- gets stateFldCharState
modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState}
-- Word uses list enumeration for numbered headings, so we only
-- want to infer a list from the styles if it is NOT a heading.
- let parparts = parparts' ++ (openFldCharsToParParts fldCharState) in
- case pHeading parstyle of
- Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
- levelInfo <- lookupLevel numId lvl <$> asks envNumbering
- return $ ListItem parstyle numId lvl levelInfo parparts
- _ -> let
- hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
-
- hasSimpleTableField = fromMaybe False $ do
- fldSimple <- findChildByName ns "w" "fldSimple" element
- instr <- findAttrByName ns "w" "instr" fldSimple
- pure ("Table" `elem` T.words instr)
-
- hasComplexTableField = fromMaybe False $ do
- instrText <- findElementByName ns "w" "instrText" element
- pure ("Table" `elem` T.words (strContent instrText))
-
- in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
- then return $ TblCaption parstyle parparts
- else return $ Paragraph parstyle parparts
+ let parparts = parparts' ++ (openFldCharsToParParts fldCharState)
+ case pHeading parstyle of
+ Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do
+ mkListItem parstyle numId lvl parparts
+ _ -> let
+ hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle)
+
+ hasSimpleTableField = fromMaybe False $ do
+ fldSimple <- findChildByName ns "w" "fldSimple" element
+ instr <- findAttrByName ns "w" "instr" fldSimple
+ pure ("Table" `elem` T.words instr)
+
+ hasComplexTableField = fromMaybe False $ do
+ instrText <- findElementByName ns "w" "instrText" element
+ pure ("Table" `elem` T.words (strContent instrText))
+
+ in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField)
+ then return $ TblCaption parstyle parparts
+ else return $ Paragraph parstyle parparts
elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
@@ -1115,15 +1127,22 @@ getTrackedChange ns element
Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate)
getTrackedChange _ _ = Nothing
-elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle
-elemToParagraphStyle ns element sty
+elemToParagraphStyle :: NameSpaces -> Element
+ -> ParStyleMap
+ -> Numbering
+ -> ParagraphStyle
+elemToParagraphStyle ns element sty numbering
| Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
(fmap ParaStyleId . findAttrByName ns "w" "val")
(findChildrenByName ns "w" "pStyle" pPr)
+ pStyle' = mapMaybe (`M.lookup` sty) style
in ParagraphStyle
- {pStyle = mapMaybe (`M.lookup` sty) style
+ {pStyle = pStyle'
+ , numbered = case getNumInfo ns element of
+ Just (numId, lvl) -> isJust $ lookupLevel numId lvl numbering
+ Nothing -> isJust $ getParStyleField numInfo pStyle'
, indentation =
getIndentation ns element
, dropCap =
@@ -1143,7 +1162,7 @@ elemToParagraphStyle ns element sty
getTrackedChange ns
, pBidi = checkOnOff ns pPr (elemName ns "w" "bidi")
}
-elemToParagraphStyle _ _ _ = defaultParagraphStyle
+ | otherwise = defaultParagraphStyle
elemToRunStyleD :: NameSpaces -> Element -> D RunStyle
elemToRunStyleD ns element