diff options
| author | Albert Krewinkel <[email protected]> | 2022-07-30 17:56:44 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2022-07-30 08:56:44 -0700 |
| commit | c015c35a8a7026dd7ee3207053319e886fa637c2 (patch) | |
| tree | 0f24979fd42d380650c949e2b33bda76d8cbb8df /src | |
| parent | de5620b04df540d11efdf96fb142b63b5ec4d3c5 (diff) | |
Support rowspans and colspans in grid tables (#8202)
* Add tests for zero-width and fullwidth chars in grid tables
* T.P.Parsing: simplify `gridTableWith'`, `gridTableWith` [API Change]
The functions `gridTableWith` and `gridTableWith'` no longer takes a
boolean argument that toggles whether a table head should be parsed:
both, tables with heads and without heads, are always accepted now.
* Support colspans, rowspans, and multirow headers in grid tables.
Grid tables in Markdown, reStructuredText, and Org can now contain cells
spanning over multiple columns and/or multiple rows; table headers
containing multiple rows are supported as well.
Note: the markdown writer does not yet support these more complex grid
table features.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Text/Pandoc/Parsing/GridTable.hs | 164 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 8 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 2 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 13 |
4 files changed, 70 insertions, 117 deletions
diff --git a/src/Text/Pandoc/Parsing/GridTable.hs b/src/Text/Pandoc/Parsing/GridTable.hs index 9db91fba7..58ab1494b 100644 --- a/src/Text/Pandoc/Parsing/GridTable.hs +++ b/src/Text/Pandoc/Parsing/GridTable.hs @@ -23,8 +23,7 @@ module Text.Pandoc.Parsing.GridTable ) where -import Control.Monad (guard) -import Data.List (transpose) +import Data.Array (elems) import Data.Text (Text) import Safe (lastDef) import Text.Pandoc.Options (ReaderOptions (readerColumns)) @@ -33,12 +32,11 @@ import Text.Pandoc.Definition import Text.Pandoc.Parsing.Capabilities import Text.Pandoc.Parsing.General import Text.Pandoc.Parsing.Types -import Text.Pandoc.Shared (compactify, splitTextByIndices, trim, trimr) import Text.Pandoc.Sources -import Text.Parsec - ( Stream (..), many1, notFollowedBy, option, optional, sepEndBy1, try ) +import Text.Parsec (Stream (..), optional, sepEndBy1, try) import qualified Data.Text as T +import qualified Text.GridTable as GT import qualified Text.Pandoc.Builder as B -- | Collection of components making up a Table block. @@ -106,11 +104,9 @@ data TableNormalization -- line). gridTableWith :: (Monad m, Monad mf, HasLastStrPosition st, HasReaderOptions st) => ParserT Sources st m (mf Blocks) -- ^ Block list parser - -> Bool -- ^ Headerless table -> ParserT Sources st m (mf Blocks) -gridTableWith blocks headless = - tableWith (gridTableHeader headless blocks) (gridTableRow blocks) - (gridTableSep '-') gridTableFooter +gridTableWith blocks = fmap tableFromComponents <$> + gridTableWith' NoNormalization blocks -- | Like @'gridTableWith'@, but returns 'TableComponents' instead of a -- Table. @@ -118,97 +114,46 @@ gridTableWith' :: (Monad m, Monad mf, HasReaderOptions st, HasLastStrPosition st) => TableNormalization -> ParserT Sources st m (mf Blocks) -- ^ Block list parser - -> Bool -- ^ Headerless table -> ParserT Sources st m (mf TableComponents) -gridTableWith' normalization blocks headless = - tableWith' normalization - (gridTableHeader headless blocks) (gridTableRow blocks) - (gridTableSep '-') gridTableFooter - -gridTableSplitLine :: [Int] -> Text -> [Text] -gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitTextByIndices (init indices) $ trimr line - --- | Parses a grid segment, where the grid line is made up from the --- given char and terminated with a plus (@+@). The grid line may begin --- and/or end with a colon, signaling column alignment. Returns the size --- of the grid part and column alignment -gridPart :: Monad m => Char -> ParserT Sources st m (Int, Alignment) -gridPart ch = do - leftColon <- option False (True <$ char ':') - dashes <- many1 (char ch) - rightColon <- option False (True <$ char ':') - char '+' - let lengthDashes = length dashes + (if leftColon then 1 else 0) + - (if rightColon then 1 else 0) - let alignment = case (leftColon, rightColon) of - (True, True) -> AlignCenter - (True, False) -> AlignLeft - (False, True) -> AlignRight - (False, False) -> AlignDefault - return (lengthDashes + 1, alignment) - -gridDashedLines :: Monad m - => Char -> ParserT Sources st m [(Int, Alignment)] -gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline - -removeFinalBar :: Text -> Text -removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|') - where - go c = T.any (== c) " \t" - --- | Separator between rows of grid table. -gridTableSep :: Monad m => Char -> ParserT Sources st m Char -gridTableSep ch = try $ gridDashedLines ch >> return '\n' - --- | Parse header for a grid table. -gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st) - => Bool -- ^ Headerless table - -> ParserT Sources st m (mf Blocks) - -> ParserT Sources st m (mf [Blocks], [Alignment], [Int]) -gridTableHeader True _ = do - optional blanklines - dashes <- gridDashedLines '-' - let aligns = map snd dashes - let lines' = map fst dashes - let indices = scanl (+) 0 lines' - return (return [], aligns, indices) -gridTableHeader False blocks = try $ do - optional blanklines - dashes <- gridDashedLines '-' - rawContent <- many1 (notFollowedBy (gridTableSep '=') >> char '|' >> - T.pack <$> many1Till anyChar newline) - underDashes <- gridDashedLines '=' - guard $ length dashes == length underDashes - let lines' = map fst underDashes - let indices = scanl (+) 0 lines' - let aligns = map snd underDashes - let rawHeads = map (T.unlines . map trim) $ transpose - $ map (gridTableSplitLine indices) rawContent - heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads - return (heads, aligns, indices) - -gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) - => [Int] -> ParserT s st m [Text] -gridTableRawLine indices = do - char '|' - line <- many1Till anyChar newline - return (gridTableSplitLine indices $ T.pack line) - --- | Parse row of grid table. -gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st) - => ParserT Sources st m (mf Blocks) - -> [Int] - -> ParserT Sources st m (mf [Blocks]) -gridTableRow blocks indices = do - colLines <- many1 (gridTableRawLine indices) - let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $ - transpose colLines - compactifyCell bs = case compactify [bs] of - [] -> mempty - x:_ -> x - cells <- sequence <$> mapM (parseFromString' blocks) cols - return $ fmap (map compactifyCell) cells +gridTableWith' normalization blocks = do + tbl <- GT.gridTable <* optional blanklines + let blkTbl = GT.mapCells + (\lns -> parseFromString' blocks + . flip T.snoc '\n' -- ensure proper block parsing + . T.unlines + . removeOneLeadingSpace + $ map T.stripEnd lns) + tbl + let rows = GT.rows blkTbl + let toPandocCell (GT.Cell c (GT.RowSpan rs) (GT.ColSpan cs)) = + fmap (B.cell AlignDefault (B.RowSpan rs) (B.ColSpan cs) . plainify) <$> c + rows' <- mapM (mapM toPandocCell) rows + columns <- getOption readerColumns + let colspecs = zipWith (\cs w -> (convAlign $ fst cs, B.ColWidth w)) + (elems $ GT.arrayTableColSpecs tbl) + (fractionalColumnWidths tbl columns) + let caption = B.emptyCaption + return $ do + rows'' <- mapM sequence rows' + let (hRows, bRows) = + splitAt (maybe 0 GT.fromRowIndex $ GT.arrayTableHead tbl) + (map (B.Row B.nullAttr) rows'') + let thead = B.TableHead B.nullAttr $ case (hRows, normalization) of + -- normalize header if necessary: remove header if it contains + -- only a single row in which all cells are empty. + ([hrow], NormalizeHeader) -> + let Row _attr cells = hrow + simple = \case + Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [] -> + True + _ -> + False + in [B.Row nullAttr cells | not (null cells) && + not (all simple cells)] + _ -> hRows + let tfoot = B.TableFoot B.nullAttr [] + let tbody = B.TableBody B.nullAttr 0 [] bRows + return $ TableComponents nullAttr caption colspecs thead [tbody] tfoot removeOneLeadingSpace :: [Text] -> [Text] removeOneLeadingSpace xs = @@ -219,10 +164,23 @@ removeOneLeadingSpace xs = Nothing -> True Just (c, _) -> c == ' ' --- | Parse footer for a grid table. -gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) - => ParserT s st m () -gridTableFooter = optional blanklines +plainify :: B.Blocks -> B.Blocks +plainify blks = case B.toList blks of + [Para x] -> B.fromList [Plain x] + _ -> blks + +convAlign :: GT.Alignment -> B.Alignment +convAlign GT.AlignLeft = B.AlignLeft +convAlign GT.AlignRight = B.AlignRight +convAlign GT.AlignCenter = B.AlignCenter +convAlign GT.AlignDefault = B.AlignDefault + +fractionalColumnWidths :: GT.ArrayTable a -> Int -> [Double] +fractionalColumnWidths gt charColumns = + let widths = map ((+1) . snd) $ -- include width of separator + (elems $ GT.arrayTableColSpecs gt) + norm = fromIntegral $ max (sum widths + length widths - 2) charColumns + in map (\w -> fromIntegral w / norm) widths --- diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 227d6e791..b09a511bf 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1362,9 +1362,9 @@ multilineTableHeader headless = try $ do -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTable :: PandocMonad m => Bool -- ^ Headerless table - -> MarkdownParser m (F TableComponents) -gridTable headless = gridTableWith' NormalizeHeader parseBlocks headless +gridTable :: PandocMonad m + => MarkdownParser m (F TableComponents) +gridTable = gridTableWith' NormalizeHeader parseBlocks pipeBreak :: PandocMonad m => MarkdownParser m ([Alignment], [Int]) pipeBreak = try $ do @@ -1466,7 +1466,7 @@ table = try $ do (guardEnabled Ext_multiline_tables >> try (multilineTable True)) <|> (guardEnabled Ext_grid_tables >> - try (gridTable False <|> gridTable True)) <?> "table" + try gridTable) <?> "table" optional blanklines caption <- case frontCaption of Nothing -> option (return mempty) tableCaption diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 03cfdfaed..1a97a07b4 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -624,7 +624,7 @@ data OrgTable = OrgTable table :: PandocMonad m => OrgParser m (F Blocks) table = do withTables <- getExportSetting exportWithTables - tbl <- gridTableWith blocks True <|> orgTable + tbl <- gridTableWith blocks <|> orgTable return $ if withTables then tbl else mempty -- | A normal org table diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index b87c0ab71..0b824ad33 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1252,9 +1252,6 @@ headerBlock = do -- - multiline support -- - ensure that rightmost column span does not need to reach end -- - require at least 2 columns --- --- Grid tables TODO: --- - column spans dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int) dashedLine ch = do @@ -1344,14 +1341,12 @@ simpleTable headless = do rewidth = fmap $ fmap $ const ColWidthDefault gridTable :: PandocMonad m - => Bool -- ^ Headerless table - -> RSTParser m Blocks -gridTable headerless = runIdentity <$> - gridTableWith (Identity <$> parseBlocks) headerless + => RSTParser m Blocks +gridTable = runIdentity <$> + gridTableWith (Identity <$> parseBlocks) table :: PandocMonad m => RSTParser m Blocks -table = gridTable False <|> simpleTable False <|> - gridTable True <|> simpleTable True <?> "table" +table = gridTable <|> simpleTable False <|> simpleTable True <?> "table" -- -- inline |
