aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2021-12-30 18:25:16 -0800
committerJohn MacFarlane <[email protected]>2021-12-30 21:26:30 -0800
commitcc30d646cae917efa3187a9a812908510e9543a2 (patch)
tree5d03123d8c064b6cd269dd2589a043cb1f9f714f /src
parent4ff997bf687df689347975a6a129e98a8e7d28b7 (diff)
Docx reader: change elemToParPart to return [ParPart]
...instead of ParPart. Also remove NullParPart constructor, as it is no longer needed. This will allow us to handle elements that contain multiple ParParts, e.g. w:drawing elements with multiple pic:pic. See #7786.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs1
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs68
2 files changed, 34 insertions, 35 deletions
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index f328fef27..0fa72035d 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -450,7 +450,6 @@ parPartToInlines' (Field info children) =
HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children
PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children
_ -> smushInlines <$> mapM parPartToInlines' children
-parPartToInlines' NullParPart = return mempty
isAnchorSpan :: Inline -> Bool
isAnchorSpan (Span (_, ["anchor"], []) _) = True
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 87a3aebef..58aa6fb71 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -320,8 +320,6 @@ data ParPart = PlainRun Run
| Diagram -- placeholder for now
| PlainOMath [Exp]
| Field FieldInfo [ParPart]
- | NullParPart -- when we need to return nothing, but
- -- not because of an error.
deriving Show
data Run = Run RunStyle [RunElem]
@@ -694,13 +692,13 @@ elemToBodyPart ns element
| isElem ns "w" "p" element
, Just (numId, lvl) <- getNumInfo ns element = do
parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
- parparts <- mapD (elemToParPart ns) (elChildren element)
+ parparts <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
levelInfo <- lookupLevel numId lvl <$> asks envNumbering
return $ ListItem parstyle numId lvl levelInfo parparts
elemToBodyPart ns element
| isElem ns "w" "p" element = do
parstyle <- elemToParagraphStyle ns element <$> asks envParStyles
- parparts' <- mapD (elemToParPart ns) (elChildren element)
+ parparts' <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
fldCharState <- gets stateFldCharState
modify $ \st -> st {stateFldCharState = emptyFldCharContents fldCharState}
-- Word uses list enumeration for numbered headings, so we only
@@ -792,7 +790,7 @@ getTitleAndAlt ns element =
alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
in (title, alt)
-elemToParPart :: NameSpaces -> Element -> D ParPart
+elemToParPart :: NameSpaces -> Element -> D [ParPart]
{-
The next one is a bit complicated. fldChar fields work by first
having a <w:fldChar fldCharType="begin"> in a run, then a run with
@@ -840,21 +838,21 @@ elemToParPart ns element
case fldCharState of
_ | fldCharType == "begin" -> do
modify $ \st -> st {stateFldCharState = FldCharOpen : fldCharState}
- return NullParPart
+ return []
FldCharFieldInfo info : ancestors | fldCharType == "separate" -> do
modify $ \st -> st {stateFldCharState = FldCharContent info [] : ancestors}
- return NullParPart
+ return []
-- Some fields have no content, since Pandoc doesn't understand any of those fields, we can just close it.
FldCharFieldInfo _ : ancestors | fldCharType == "end" -> do
modify $ \st -> st {stateFldCharState = ancestors}
- return NullParPart
+ return []
[FldCharContent info children] | fldCharType == "end" -> do
modify $ \st -> st {stateFldCharState = []}
- return $ Field info $ reverse children
+ return [Field info $ reverse children]
FldCharContent info children : FldCharContent parentInfo siblings : ancestors | fldCharType == "end" ->
let parent = FldCharContent parentInfo $ (Field info (reverse children)) : siblings in do
modify $ \st -> st {stateFldCharState = parent : ancestors}
- return NullParPart
+ return []
_ -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "r" element
@@ -864,8 +862,8 @@ elemToParPart ns element
FldCharOpen : ancestors -> do
info <- eitherToD $ parseFieldInfo $ strContent instrText
modify $ \st -> st {stateFldCharState = FldCharFieldInfo info : ancestors}
- return NullParPart
- _ -> return NullParPart
+ return []
+ _ -> return []
{-
There is an open fldchar, so we calculate the element and add it to the
children. For this we need to first change the fldchar state to an empty
@@ -878,12 +876,12 @@ elemToParPart ns element = do
case fldCharState of
FldCharContent info children : ancestors -> do
modify $ \st -> st {stateFldCharState = []}
- parPart <- elemToParPart' ns element `catchError` \_ -> return NullParPart
- modify $ \st -> st{stateFldCharState = FldCharContent info (parPart : children) : ancestors}
- return NullParPart
+ parParts <- elemToParPart' ns element `catchError` \_ -> return []
+ modify $ \st -> st{stateFldCharState = FldCharContent info (parParts ++ children) : ancestors}
+ return []
_ -> elemToParPart' ns element
-elemToParPart' :: NameSpaces -> Element -> D ParPart
+elemToParPart' :: NameSpaces -> Element -> D [ParPart]
elemToParPart' ns element
| isElem ns "w" "r" element
, Just drawingElem <- findChildByName ns "w" "drawing" element
@@ -895,7 +893,8 @@ elemToParPart' ns element
>>= findAttrByName ns "r" "embed"
in
case drawing of
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
+ Just s -> expandDrawingId s >>= \(fp, bs) ->
+ return [Drawing fp title alt bs (elemToExtent drawingElem)]
Nothing -> throwError WrongElem
-- The two cases below are an attempt to deal with images in deprecated vml format.
-- Todo: check out title and attr for deprecated format.
@@ -906,7 +905,7 @@ elemToParPart' ns element
>>= findAttrByName ns "r" "id"
in
case drawing of
- Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
+ Just s -> expandDrawingId s >>= (\(fp, bs) -> return [Drawing fp "" "" bs Nothing])
Nothing -> throwError WrongElem
elemToParPart' ns element
| isElem ns "w" "r" element
@@ -914,51 +913,52 @@ elemToParPart' ns element
, Just shapeElem <- findChildByName ns "v" "shape" objectElem
, Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem
, Just drawingId <- findAttrByName ns "r" "id" imagedataElem
- = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing)
+ = expandDrawingId drawingId >>= (\(fp, bs) -> return [Drawing fp "" "" bs Nothing])
-- Diagram
elemToParPart' ns element
| isElem ns "w" "r" element
, Just drawingElem <- findChildByName ns "w" "drawing" element
, d_ns <- "http://schemas.openxmlformats.org/drawingml/2006/diagram"
, Just _ <- findElement (QName "relIds" (Just d_ns) (Just "dgm")) drawingElem
- = return Diagram
+ = return [Diagram]
-- Chart
elemToParPart' ns element
| isElem ns "w" "r" element
, Just drawingElem <- findChildByName ns "w" "drawing" element
, c_ns <- "http://schemas.openxmlformats.org/drawingml/2006/chart"
, Just _ <- findElement (QName "chart" (Just c_ns) (Just "c")) drawingElem
- = return Chart
+ = return [Chart]
elemToParPart' ns element
| isElem ns "w" "r" element = do
run <- elemToRun ns element
- return $ PlainRun run
+ return [PlainRun run]
elemToParPart' ns element
| Just change <- getTrackedChange ns element = do
runs <- mapD (elemToRun ns) (elChildren element)
- return $ ChangedRuns change runs
+ return [ChangedRuns change runs]
elemToParPart' ns element
| isElem ns "w" "bookmarkStart" element
, Just bmId <- findAttrByName ns "w" "id" element
, Just bmName <- findAttrByName ns "w" "name" element =
- return $ BookMark bmId bmName
+ return [BookMark bmId bmName]
elemToParPart' ns element
| isElem ns "w" "hyperlink" element
, Just relId <- findAttrByName ns "r" "id" element = do
location <- asks envLocation
- children <- mapD (elemToParPart ns) (elChildren element)
+ children <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
Just target ->
case findAttrByName ns "w" "anchor" element of
- Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) children
- Nothing -> return $ ExternalHyperLink target children
- Nothing -> return $ ExternalHyperLink "" children
+ Just anchor -> return
+ [ExternalHyperLink (target <> "#" <> anchor) children]
+ Nothing -> return [ExternalHyperLink target children]
+ Nothing -> return [ExternalHyperLink "" children]
elemToParPart' ns element
| isElem ns "w" "hyperlink" element
, Just anchor <- findAttrByName ns "w" "anchor" element = do
- children <- mapD (elemToParPart ns) (elChildren element)
- return $ InternalHyperLink anchor children
+ children <- mconcat <$> mapD (elemToParPart ns) (elChildren element)
+ return [InternalHyperLink anchor children]
elemToParPart' ns element
| isElem ns "w" "commentRangeStart" element
, Just cmtId <- findAttrByName ns "w" "id" element = do
@@ -969,20 +969,20 @@ elemToParPart' ns element
elemToParPart' ns element
| isElem ns "w" "commentRangeEnd" element
, Just cmtId <- findAttrByName ns "w" "id" element =
- return $ CommentEnd cmtId
+ return [CommentEnd cmtId]
elemToParPart' ns element
| isElem ns "m" "oMath" element =
- fmap PlainOMath (eitherToD $ readOMML $ showElement element)
+ fmap (return . PlainOMath) (eitherToD $ readOMML $ showElement element)
elemToParPart' _ _ = throwError WrongElem
-elemToCommentStart :: NameSpaces -> Element -> D ParPart
+elemToCommentStart :: NameSpaces -> Element -> D [ParPart]
elemToCommentStart ns element
| isElem ns "w" "comment" element
, Just cmtId <- findAttrByName ns "w" "id" element
, Just cmtAuthor <- findAttrByName ns "w" "author" element
, cmtDate <- findAttrByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
- return $ CommentStart cmtId cmtAuthor cmtDate bps
+ return [CommentStart cmtId cmtAuthor cmtDate bps]
elemToCommentStart _ _ = throwError WrongElem
lookupFootnote :: T.Text -> Notes -> Maybe Element