aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNoah Malmed <[email protected]>2023-04-05 15:37:22 -0400
committerGitHub <[email protected]>2023-04-05 12:37:22 -0700
commit0353e11e27c08d211cea13bbad1ad968845dc29e (patch)
tree42ffc1a8f40f011ec4b4469b482bb623be45b5b5 /src
parent9f718da38a75cc703a32c9f0818770fcfd2ac8f8 (diff)
Add rowspan, colspan and alignment to cells in jats table reader (#8726)
Partially addresses #8408
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs46
1 files changed, 30 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 47cd596e8..56f341b54 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -268,26 +268,29 @@ parseBlock (Elem e) =
Just c -> filterChildren isColspec c
_ -> filterChildren isColspec e'
let isRow x = named "row" x || named "tr" x
- headrows <- case filterChild (named "thead") e' of
- Just h -> case filterChild isRow h of
- Just x -> parseRow x
- Nothing -> return []
- Nothing -> return []
- bodyrows <- case filterChild (named "tbody") e' of
- Just b -> mapM parseRow
- $ filterChildren isRow b
- Nothing -> mapM parseRow
- $ filterChildren isRow e'
+
+ -- list of header cell elements
+ let headRowElements = case filterChild (named "thead") e' of
+ Just h -> maybe [] parseElement (filterChild isRow h)
+ Nothing -> []
+ -- list of list of body cell elements
+ let bodyRowElements = case filterChild (named "tbody") e' of
+ Just b -> map parseElement $ filterChildren isRow b
+ Nothing -> map parseElement $ filterChildren isRow e'
let toAlignment c = case findAttr (unqual "align") c of
Just "left" -> AlignLeft
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
+ let toColSpan element = fromMaybe 1 $
+ findAttr (unqual "colspan") element >>= safeRead
+ let toRowSpan element = fromMaybe 1 $
+ findAttr (unqual "rowspan") element >>= safeRead
let toWidth c = do
w <- findAttr (unqual "colwidth") c
n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
if n > 0 then Just n else Nothing
- let numrows = foldl' max 0 $ map length bodyrows
+ let numrows = foldl' max 0 $ map length bodyRowElements
let aligns = case colspecs of
[] -> replicate numrows AlignDefault
cs -> map toAlignment cs
@@ -298,15 +301,26 @@ parseBlock (Elem e) =
Just ws' -> let tot = sum ws'
in ColWidth . (/ tot) <$> ws'
Nothing -> replicate numrows ColWidthDefault
- let toRow = Row nullAttr . map simpleCell
- toHeaderRow l = [toRow l | not (null l)]
+
+ let parseCell = parseMixed plain . elContent
+ let elementToCell element = cell
+ (toAlignment element)
+ (RowSpan $ toRowSpan element)
+ (ColSpan $ toColSpan element)
+ <$> (parseCell element)
+ let rowElementsToCells elements = mapM elementToCell elements
+ let toRow = fmap (Row nullAttr) . rowElementsToCells
+ toHeaderRow element = sequence $ [toRow element | not (null element)]
+
+ headerRow <- toHeaderRow headRowElements
+ bodyRows <- mapM toRow bodyRowElements
return $ table (simpleCaption $ plain capt)
(zip aligns widths)
- (TableHead nullAttr $ toHeaderRow headrows)
- [TableBody nullAttr 0 [] $ map toRow bodyrows]
+ (TableHead nullAttr headerRow)
+ [TableBody nullAttr 0 [] bodyRows]
(TableFoot nullAttr [])
isEntry x = named "entry" x || named "td" x || named "th" x
- parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
+ parseElement = filterChildren isEntry
sect n = do isbook <- gets jatsBook
let n' = if isbook || n == 0 then n + 1 else n
labelText <- case filterChild (named "label") e of