aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Malmed <[email protected]>2023-04-05 15:37:22 -0400
committerGitHub <[email protected]>2023-04-05 12:37:22 -0700
commit0353e11e27c08d211cea13bbad1ad968845dc29e (patch)
tree42ffc1a8f40f011ec4b4469b482bb623be45b5b5
parent9f718da38a75cc703a32c9f0818770fcfd2ac8f8 (diff)
Add rowspan, colspan and alignment to cells in jats table reader (#8726)
Partially addresses #8408
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs46
-rw-r--r--test/jats-reader.native90
-rw-r--r--test/jats-reader.xml44
3 files changed, 164 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index 47cd596e8..56f341b54 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -268,26 +268,29 @@ parseBlock (Elem e) =
Just c -> filterChildren isColspec c
_ -> filterChildren isColspec e'
let isRow x = named "row" x || named "tr" x
- headrows <- case filterChild (named "thead") e' of
- Just h -> case filterChild isRow h of
- Just x -> parseRow x
- Nothing -> return []
- Nothing -> return []
- bodyrows <- case filterChild (named "tbody") e' of
- Just b -> mapM parseRow
- $ filterChildren isRow b
- Nothing -> mapM parseRow
- $ filterChildren isRow e'
+
+ -- list of header cell elements
+ let headRowElements = case filterChild (named "thead") e' of
+ Just h -> maybe [] parseElement (filterChild isRow h)
+ Nothing -> []
+ -- list of list of body cell elements
+ let bodyRowElements = case filterChild (named "tbody") e' of
+ Just b -> map parseElement $ filterChildren isRow b
+ Nothing -> map parseElement $ filterChildren isRow e'
let toAlignment c = case findAttr (unqual "align") c of
Just "left" -> AlignLeft
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
+ let toColSpan element = fromMaybe 1 $
+ findAttr (unqual "colspan") element >>= safeRead
+ let toRowSpan element = fromMaybe 1 $
+ findAttr (unqual "rowspan") element >>= safeRead
let toWidth c = do
w <- findAttr (unqual "colwidth") c
n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
if n > 0 then Just n else Nothing
- let numrows = foldl' max 0 $ map length bodyrows
+ let numrows = foldl' max 0 $ map length bodyRowElements
let aligns = case colspecs of
[] -> replicate numrows AlignDefault
cs -> map toAlignment cs
@@ -298,15 +301,26 @@ parseBlock (Elem e) =
Just ws' -> let tot = sum ws'
in ColWidth . (/ tot) <$> ws'
Nothing -> replicate numrows ColWidthDefault
- let toRow = Row nullAttr . map simpleCell
- toHeaderRow l = [toRow l | not (null l)]
+
+ let parseCell = parseMixed plain . elContent
+ let elementToCell element = cell
+ (toAlignment element)
+ (RowSpan $ toRowSpan element)
+ (ColSpan $ toColSpan element)
+ <$> (parseCell element)
+ let rowElementsToCells elements = mapM elementToCell elements
+ let toRow = fmap (Row nullAttr) . rowElementsToCells
+ toHeaderRow element = sequence $ [toRow element | not (null element)]
+
+ headerRow <- toHeaderRow headRowElements
+ bodyRows <- mapM toRow bodyRowElements
return $ table (simpleCaption $ plain capt)
(zip aligns widths)
- (TableHead nullAttr $ toHeaderRow headrows)
- [TableBody nullAttr 0 [] $ map toRow bodyrows]
+ (TableHead nullAttr headerRow)
+ [TableBody nullAttr 0 [] bodyRows]
(TableFoot nullAttr [])
isEntry x = named "entry" x || named "td" x || named "th" x
- parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry
+ parseElement = filterChildren isEntry
sect n = do isbook <- gets jatsBook
let n' = if isbook || n == 0 then n + 1 else n
labelText <- case filterChild (named "label") e of
diff --git a/test/jats-reader.native b/test/jats-reader.native
index 02439ce31..254333f57 100644
--- a/test/jats-reader.native
+++ b/test/jats-reader.native
@@ -2794,6 +2794,96 @@ Pandoc
(TableFoot ( "" , [] , [] ) [])
, Header
2
+ ( "table-with-spans-and-alignments" , [] , [] )
+ [ Str "Tables"
+ , Space
+ , Str "with"
+ , Space
+ , Str "spans"
+ , Space
+ , Str "and"
+ , Space
+ , Str "alignments"
+ ]
+ , Table
+ ( "" , [] , [] )
+ (Caption Nothing [])
+ [ ( AlignLeft , ColWidthDefault )
+ , ( AlignLeft , ColWidthDefault )
+ , ( AlignLeft , ColWidthDefault )
+ ]
+ (TableHead
+ ( "" , [] , [] )
+ [ Row
+ ( "" , [] , [] )
+ [ Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 2)
+ [ Para [ Str "1" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignRight
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Para [ Str "2" ] ]
+ ]
+ ])
+ [ TableBody
+ ( "" , [] , [] )
+ (RowHeadColumns 0)
+ []
+ [ Row
+ ( "" , [] , [] )
+ [ Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 2)
+ [ Para [ Str "1" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignLeft
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Para [ Str "2" ] ]
+ ]
+ , Row
+ ( "" , [] , [] )
+ [ Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 2)
+ (ColSpan 1)
+ [ Para [ Str "4" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Para [ Str "5" ] ]
+ , Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 1)
+ [ Para [ Str "6" ] ]
+ ]
+ , Row
+ ( "" , [] , [] )
+ [ Cell
+ ( "" , [] , [] )
+ AlignDefault
+ (RowSpan 1)
+ (ColSpan 2)
+ [ Para [ Str "7" ] ]
+ ]
+ ]
+ ]
+ (TableFoot ( "" , [] , [] ) [])
+ , Header
+ 2
( "empty-tables" , [] , [] )
[ Str "Empty" , Space , Str "Tables" ]
, Para
diff --git a/test/jats-reader.xml b/test/jats-reader.xml
index f33cb9ab3..6dbc2c3d9 100644
--- a/test/jats-reader.xml
+++ b/test/jats-reader.xml
@@ -1150,6 +1150,50 @@ These should not be escaped: \$ \\ \&gt; \[ \{</preformat>
</tbody>
</table>
</sec>
+ <sec id="table-with-spans-and-alignments">
+ <title>Tables with spans and alignments</title>
+ <table>
+ <col align="left" />
+ <col align="left" />
+ <col align="left" />
+ <thead>
+ <tr>
+ <td colspan="2">
+ <p>1</p>
+ </td>
+ <td align="right">
+ <p>2</p>
+ </td>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td colspan="2">
+ <p>1</p>
+ </td>
+ <td align="left">
+ <p>2</p>
+ </td>
+ </tr>
+ <tr>
+ <td rowspan="2">
+ <p>4</p>
+ </td>
+ <td>
+ <p>5</p>
+ </td>
+ <td>
+ <p>6</p>
+ </td>
+ </tr>
+ <tr>
+ <td colspan="2">
+ <p>7</p>
+ </td>
+ </tr>
+ </tbody>
+ </table>
+ </sec>
<sec id="empty-tables">
<title>Empty Tables</title>
<p>This section should be empty.</p>