diff options
| author | Noah Malmed <[email protected]> | 2023-04-05 15:37:22 -0400 |
|---|---|---|
| committer | GitHub <[email protected]> | 2023-04-05 12:37:22 -0700 |
| commit | 0353e11e27c08d211cea13bbad1ad968845dc29e (patch) | |
| tree | 42ffc1a8f40f011ec4b4469b482bb623be45b5b5 | |
| parent | 9f718da38a75cc703a32c9f0818770fcfd2ac8f8 (diff) | |
Add rowspan, colspan and alignment to cells in jats table reader (#8726)
Partially addresses #8408
| -rw-r--r-- | src/Text/Pandoc/Readers/JATS.hs | 46 | ||||
| -rw-r--r-- | test/jats-reader.native | 90 | ||||
| -rw-r--r-- | test/jats-reader.xml | 44 |
3 files changed, 164 insertions, 16 deletions
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 47cd596e8..56f341b54 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -268,26 +268,29 @@ parseBlock (Elem e) = Just c -> filterChildren isColspec c _ -> filterChildren isColspec e' let isRow x = named "row" x || named "tr" x - headrows <- case filterChild (named "thead") e' of - Just h -> case filterChild isRow h of - Just x -> parseRow x - Nothing -> return [] - Nothing -> return [] - bodyrows <- case filterChild (named "tbody") e' of - Just b -> mapM parseRow - $ filterChildren isRow b - Nothing -> mapM parseRow - $ filterChildren isRow e' + + -- list of header cell elements + let headRowElements = case filterChild (named "thead") e' of + Just h -> maybe [] parseElement (filterChild isRow h) + Nothing -> [] + -- list of list of body cell elements + let bodyRowElements = case filterChild (named "tbody") e' of + Just b -> map parseElement $ filterChildren isRow b + Nothing -> map parseElement $ filterChildren isRow e' let toAlignment c = case findAttr (unqual "align") c of Just "left" -> AlignLeft Just "right" -> AlignRight Just "center" -> AlignCenter _ -> AlignDefault + let toColSpan element = fromMaybe 1 $ + findAttr (unqual "colspan") element >>= safeRead + let toRowSpan element = fromMaybe 1 $ + findAttr (unqual "rowspan") element >>= safeRead let toWidth c = do w <- findAttr (unqual "colwidth") c n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w if n > 0 then Just n else Nothing - let numrows = foldl' max 0 $ map length bodyrows + let numrows = foldl' max 0 $ map length bodyRowElements let aligns = case colspecs of [] -> replicate numrows AlignDefault cs -> map toAlignment cs @@ -298,15 +301,26 @@ parseBlock (Elem e) = Just ws' -> let tot = sum ws' in ColWidth . (/ tot) <$> ws' Nothing -> replicate numrows ColWidthDefault - let toRow = Row nullAttr . map simpleCell - toHeaderRow l = [toRow l | not (null l)] + + let parseCell = parseMixed plain . elContent + let elementToCell element = cell + (toAlignment element) + (RowSpan $ toRowSpan element) + (ColSpan $ toColSpan element) + <$> (parseCell element) + let rowElementsToCells elements = mapM elementToCell elements + let toRow = fmap (Row nullAttr) . rowElementsToCells + toHeaderRow element = sequence $ [toRow element | not (null element)] + + headerRow <- toHeaderRow headRowElements + bodyRows <- mapM toRow bodyRowElements return $ table (simpleCaption $ plain capt) (zip aligns widths) - (TableHead nullAttr $ toHeaderRow headrows) - [TableBody nullAttr 0 [] $ map toRow bodyrows] + (TableHead nullAttr headerRow) + [TableBody nullAttr 0 [] bodyRows] (TableFoot nullAttr []) isEntry x = named "entry" x || named "td" x || named "th" x - parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry + parseElement = filterChildren isEntry sect n = do isbook <- gets jatsBook let n' = if isbook || n == 0 then n + 1 else n labelText <- case filterChild (named "label") e of diff --git a/test/jats-reader.native b/test/jats-reader.native index 02439ce31..254333f57 100644 --- a/test/jats-reader.native +++ b/test/jats-reader.native @@ -2794,6 +2794,96 @@ Pandoc (TableFoot ( "" , [] , [] ) []) , Header 2 + ( "table-with-spans-and-alignments" , [] , [] ) + [ Str "Tables" + , Space + , Str "with" + , Space + , Str "spans" + , Space + , Str "and" + , Space + , Str "alignments" + ] + , Table + ( "" , [] , [] ) + (Caption Nothing []) + [ ( AlignLeft , ColWidthDefault ) + , ( AlignLeft , ColWidthDefault ) + , ( AlignLeft , ColWidthDefault ) + ] + (TableHead + ( "" , [] , [] ) + [ Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 2) + [ Para [ Str "1" ] ] + , Cell + ( "" , [] , [] ) + AlignRight + (RowSpan 1) + (ColSpan 1) + [ Para [ Str "2" ] ] + ] + ]) + [ TableBody + ( "" , [] , [] ) + (RowHeadColumns 0) + [] + [ Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 2) + [ Para [ Str "1" ] ] + , Cell + ( "" , [] , [] ) + AlignLeft + (RowSpan 1) + (ColSpan 1) + [ Para [ Str "2" ] ] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 2) + (ColSpan 1) + [ Para [ Str "4" ] ] + , Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [ Para [ Str "5" ] ] + , Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 1) + [ Para [ Str "6" ] ] + ] + , Row + ( "" , [] , [] ) + [ Cell + ( "" , [] , [] ) + AlignDefault + (RowSpan 1) + (ColSpan 2) + [ Para [ Str "7" ] ] + ] + ] + ] + (TableFoot ( "" , [] , [] ) []) + , Header + 2 ( "empty-tables" , [] , [] ) [ Str "Empty" , Space , Str "Tables" ] , Para diff --git a/test/jats-reader.xml b/test/jats-reader.xml index f33cb9ab3..6dbc2c3d9 100644 --- a/test/jats-reader.xml +++ b/test/jats-reader.xml @@ -1150,6 +1150,50 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> </tbody> </table> </sec> + <sec id="table-with-spans-and-alignments"> + <title>Tables with spans and alignments</title> + <table> + <col align="left" /> + <col align="left" /> + <col align="left" /> + <thead> + <tr> + <td colspan="2"> + <p>1</p> + </td> + <td align="right"> + <p>2</p> + </td> + </tr> + </thead> + <tbody> + <tr> + <td colspan="2"> + <p>1</p> + </td> + <td align="left"> + <p>2</p> + </td> + </tr> + <tr> + <td rowspan="2"> + <p>4</p> + </td> + <td> + <p>5</p> + </td> + <td> + <p>6</p> + </td> + </tr> + <tr> + <td colspan="2"> + <p>7</p> + </td> + </tr> + </tbody> + </table> + </sec> <sec id="empty-tables"> <title>Empty Tables</title> <p>This section should be empty.</p> |
