RST Reader: Add col spans for simple tables

[API change] Text.Pandoc.Parsing: New functions `tableWithSpans`, `tableWithSpans'`, `toTableComponentsWithSpans` and `toTableComponentsWithSpans'` take a list of lists of (Blocks, RowSpan, ColSpan) to parse a Table with different RowSpan and ColSpan values accordingly. New helper functions `singleRowSpans` and `singleColumnSpans` help set all RowSpans or ColSpans to be 1 in case the table format only allows setting one or the other.
author: Tuong Nguyen Manh <[email protected]> 2025-09-06 18:20:32 +0200
committer: John MacFarlane <[email protected]> 2025-09-09 10:27:18 +0200
commit: 47e1cc46c67e937f7202d98de1d9bdd857f7b454 (patch)
tree: 0cec278c9f9339d94766576157da635e96f84035 /src/Text
parent: ff12c20f20067c55f8f55bb66e710b1d8910ad3a (diff)
3 files changed, 175 insertions, 32 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 67fcc363d..ddd409d0b 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -61,6 +61,8 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              lineBlockLines,
                              tableWith,
                              tableWith',
+                             tableWithSpans,
+                             tableWithSpans',
                              widthsFromIndices,
                              gridTableWith,
                              gridTableWith',
@@ -68,6 +70,10 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
                              TableNormalization (..),
                              toTableComponents,
                              toTableComponents',
+                             toTableComponentsWithSpans,
+                             toTableComponentsWithSpans',
+                             singleRowSpans,
+                             singleColumnSpans,
                              readWith,
                              readWithM,
                              testStringWith,
@@ -303,8 +309,14 @@ import Text.Pandoc.Parsing.GridTable
       gridTableWith',
       tableWith,
       tableWith',
+      tableWithSpans,
+      tableWithSpans',
       toTableComponents,
       toTableComponents',
+      toTableComponentsWithSpans,
+      toTableComponentsWithSpans',
+      singleRowSpans,
+      singleColumnSpans,
       widthsFromIndices,
       TableComponents(..),
       TableNormalization(..) )
diff --git a/src/Text/Pandoc/Parsing/GridTable.hs b/src/Text/Pandoc/Parsing/GridTable.hs
index aa4a063a0..43871ef0f 100644
--- a/src/Text/Pandoc/Parsing/GridTable.hs
+++ b/src/Text/Pandoc/Parsing/GridTable.hs
@@ -14,12 +14,18 @@ module Text.Pandoc.Parsing.GridTable
   , gridTableWith'
   , tableWith
   , tableWith'
+  , tableWithSpans
+  , tableWithSpans'
   , widthsFromIndices
     -- * Components of a plain-text table
   , TableComponents (..)
   , TableNormalization (..)
   , toTableComponents
   , toTableComponents'
+  , toTableComponentsWithSpans
+  , toTableComponentsWithSpans'
+  , singleRowSpans
+  , singleColumnSpans
   )
 where
 
@@ -65,6 +71,19 @@ toTableComponents' :: TableNormalization
                    -> [Alignment] -> [Double] -> [[Blocks]] -> [[Blocks]]
                    -> TableComponents
 toTableComponents' normalization aligns widths heads rows =
+  toTableComponentsWithSpans' normalization aligns widths (singleSpansBlocks heads) (singleSpansBlocks rows)
+
+-- | Bundles basic table components with span information into a single value.
+toTableComponentsWithSpans :: [Alignment] -> [Double] -> [[(Blocks, RowSpan, ColSpan)]] -> [[(Blocks, RowSpan, ColSpan)]]
+                           -> TableComponents
+toTableComponentsWithSpans = toTableComponentsWithSpans' NoNormalization
+
+-- | Bundles basic table components with span information into a single value,
+-- performing normalizations as necessary.
+toTableComponentsWithSpans' :: TableNormalization
+                            -> [Alignment] -> [Double] -> [[(Blocks, RowSpan, ColSpan)]] -> [[(Blocks, RowSpan, ColSpan)]]
+                            -> TableComponents
+toTableComponentsWithSpans' normalization aligns widths heads rows =
   let th = TableHead nullAttr (mapMaybe (toHeaderRow normalization) heads)
       tb = TableBody nullAttr 0 [] (map toRow rows)
       tf = TableFoot nullAttr []
@@ -208,7 +227,31 @@ tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
            -> ParsecT s st m sep                       -- ^ line parser
            -> ParsecT s st m end                       -- ^ footer parser
            -> ParsecT s st m (mf TableComponents)
-tableWith' n11n headerParser rowParser lineParser footerParser = try $ do
+tableWith' n11n headerParser rowParser lineParser footerParser =
+  tableWithSpans' n11n headerParser' rowParser' lineParser footerParser
+    where
+      headerParser' = fmap (\(heads, aligns, indices) -> (fmap singleSpansBlocks heads, aligns, indices)) headerParser
+      rowParser' indices = fmap (\row -> zip3 row (repeat 1) (repeat 1)) <$> rowParser indices
+
+tableWithSpans :: (Stream s m Char, UpdateSourcePos s Char,
+              HasReaderOptions st, Monad mf)
+               => ParsecT s st m (mf [[(Blocks, RowSpan, ColSpan)]], [Alignment], [Int]) -- ^ header parser
+               -> ([Int] -> ParsecT s st m (mf [(Blocks, RowSpan, ColSpan)]))  -- ^ row parser
+               -> ParsecT s st m sep                       -- ^ line parser
+               -> ParsecT s st m end                       -- ^ footer parser
+               -> ParsecT s st m (mf Blocks)
+tableWithSpans hp rp lp fp = fmap tableFromComponents <$>
+  tableWithSpans' NoNormalization hp rp lp fp
+
+tableWithSpans' :: (Stream s m Char, UpdateSourcePos s Char,
+               HasReaderOptions st, Monad mf)
+                => TableNormalization
+                -> ParsecT s st m (mf [[(Blocks, RowSpan, ColSpan)]], [Alignment], [Int]) -- ^ header parser
+                -> ([Int] -> ParsecT s st m (mf [(Blocks, RowSpan, ColSpan)]))  -- ^ row parser
+                -> ParsecT s st m sep                       -- ^ line parser
+                -> ParsecT s st m end                       -- ^ footer parser
+                -> ParsecT s st m (mf TableComponents)
+tableWithSpans' n11n headerParser rowParser lineParser footerParser = try $ do
   (heads, aligns, indices) <- headerParser
   lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
   footerParser
@@ -216,15 +259,17 @@ tableWith' n11n headerParser rowParser lineParser footerParser = try $ do
   let widths = if null indices
                then replicate (length aligns) 0.0
                else widthsFromIndices numColumns indices
-  return $ toTableComponents' n11n aligns widths <$> heads <*> lines'
+  return $ toTableComponentsWithSpans' n11n aligns widths <$> heads <*> lines'
 
-toRow :: [Blocks] -> Row
-toRow =  Row nullAttr . map B.simpleCell
+toRow :: [(Blocks, RowSpan, ColSpan)] -> Row
+toRow =  Row nullAttr . map (\(blocks, rowSpan, columnSpan) -> B.cell AlignDefault rowSpan columnSpan blocks)
 
-toHeaderRow :: TableNormalization -> [Blocks] -> Maybe Row
+toHeaderRow :: TableNormalization -> [(Blocks, RowSpan, ColSpan)] -> Maybe Row
 toHeaderRow = \case
   NoNormalization -> \l -> if not (null l) then Just (toRow l) else Nothing
-  NormalizeHeader -> \l -> if not (all null l) then Just (toRow l) else Nothing
+  NormalizeHeader -> \l -> if not (all nullHeaderRow l) then Just (toRow l) else Nothing
+  where
+    nullHeaderRow (l, _, _) = null l
 
 -- | Calculate relative widths of table columns, based on indices
 widthsFromIndices :: Int      -- Number of columns on terminal
@@ -250,3 +295,14 @@ widthsFromIndices numColumns' indices =
                    else fromIntegral numColumns
       fracs = map (\l -> fromIntegral l / quotient) lengths in
   drop 1 fracs
+
+-- | List of lists of `RowSpan` of 1.
+singleRowSpans :: [[RowSpan]]
+singleRowSpans = repeat $ repeat 1
+
+-- | List of lists of `ColsSpan` of 1.
+singleColumnSpans :: [[ColSpan]]
+singleColumnSpans = repeat $ repeat 1
+
+singleSpansBlocks :: [[Blocks]] -> [[(Blocks, RowSpan, ColSpan)]]
+singleSpansBlocks blocks = zipWith3 zip3 blocks singleRowSpans singleColumnSpans
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 20f1177c8..989aab3ac 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -23,7 +23,7 @@ import Data.Char (isHexDigit, isSpace, toUpper, isAlphaNum, generalCategory,
                                   DashPunctuation, OtherSymbol))
 import Data.List (deleteFirstsBy, elemIndex, nub, partition, sort, transpose)
 import qualified Data.Map as M
-import Data.Maybe (fromMaybe, maybeToList, isJust)
+import Data.Maybe (fromMaybe, maybeToList, isJust, isNothing, catMaybes)
 import Data.Sequence (ViewR (..), viewr)
 import Data.Text (Text)
 import qualified Data.Text as T
@@ -1358,7 +1358,6 @@ anchor = try $ do
 --    support for them
 --
 -- Simple tables TODO:
---  - column spans
 --  - multiline support
 --  - ensure that rightmost column span does not need to reach end
 --  - require at least 2 columns
@@ -1367,10 +1366,17 @@ dashedLine :: Monad m => Char -> ParsecT Sources st m (Int, Int)
 dashedLine ch = do
   dashes <- many1 (char ch)
   sp     <- many (char ' ')
-  return (length dashes, length $ dashes ++ sp)
+  return (length dashes, length sp)
 
 simpleDashedLines :: Monad m => Char -> ParsecT Sources st m [(Int,Int)]
-simpleDashedLines ch = try $ many1 (dashedLine ch)
+simpleDashedLines ch = do
+  lines' <- try $ many1 (dashedLine ch)
+  return $ addSpaces lines'
+ where
+  addSpaces [] = []
+  addSpaces [(dashes, _)] = [(dashes, dashes)] -- Don't count trailing whitespaces
+  addSpaces ((dashes, sp) : moreLines) =
+    (dashes, dashes + sp) : addSpaces moreLines
 
 -- Parse a table row separator
 simpleTableSep :: Monad m => Char -> RSTParser m Char
@@ -1381,52 +1387,87 @@ simpleTableFooter :: Monad m => RSTParser m Text
 simpleTableFooter = try $ simpleTableSep '=' >> blanklines
 
 -- Parse a raw line and split it into chunks by indices.
-simpleTableRawLine :: Monad m => [Int] -> RSTParser m [Text]
-simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine
+simpleTableRawLine :: Monad m => [Int] -> RSTParser m [(Text, ColSpan)]
+simpleTableRawLine indices = do
+  row <- rowWithOptionalColSpan
+
+  case simpleTableSplitLine indices row of
+    Just rowLine -> return rowLine
+    Nothing -> Prelude.fail "col spans don't match"
 
-simpleTableRawLineWithInitialEmptyCell :: Monad m => [Int] -> RSTParser m [Text]
+simpleTableRawLineWithInitialEmptyCell :: Monad m => [Int] -> RSTParser m [(Text, ColSpan)]
 simpleTableRawLineWithInitialEmptyCell indices = try $ do
   cs <- simpleTableRawLine indices
   let isEmptyCell = T.all (\c -> c == ' ' || c == '\t')
   case cs of
-    c:_ | isEmptyCell c -> return cs
+    c:_ | isEmptyCell (fst c) -> return cs
     _ -> mzero
 
 -- Parse a table row and return a list of blocks (columns).
-simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks]
+simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [(Blocks, RowSpan, ColSpan)]
 simpleTableRow indices = do
   notFollowedBy' (blanklines <|> simpleTableFooter)
   firstLine <- simpleTableRawLine indices
   conLines  <- many $ simpleTableRawLineWithInitialEmptyCell indices
-  let cols = map T.unlines . transpose $ firstLine : conLines ++
+  let cols = map T.unlines . transpose $ (map fst firstLine) : (map (map fst) conLines) ++
                                   [replicate (length indices) ""
                                     | not (null conLines)]
-  mapM parseCell cols
+  let rowParser = mapM parseCell cols
+  fmap (\blocks -> zip3 blocks (repeat 1) (map snd firstLine)) rowParser
 
-simpleTableSplitLine :: [Int] -> Text -> [Text]
-simpleTableSplitLine indices line =
-  map trimr $ drop 1 $ splitTextByIndices (init indices) line
+simpleTableSplitLine :: [Int] -> (Text, Maybe [Int]) -> Maybe [(Text, ColSpan)]
+simpleTableSplitLine indices (line, maybeColspanIndices) =
+  fmap (zip tableLines) columnSpans
+ where
+  splitTableLines lineIndices = map trimr $ drop 1 $ splitTextByIndices (init lineIndices) line
+  (tableLines, columnSpans) = case maybeColspanIndices of
+      Nothing -> (splitTableLines indices, Just $ repeat 1)
+      Just colSpanIndices -> (splitTableLines colSpanIndices, colSpans indices colSpanIndices)
 
 simpleTableHeader :: PandocMonad m
                   => Bool  -- ^ Headerless table
-                  -> RSTParser m ([[Blocks]], [Alignment], [Int])
+                  -> RSTParser m ([[(Blocks, RowSpan, ColSpan)]], [Alignment], [Int])
 simpleTableHeader headless = try $ do
   optional blanklines
-  rawContent  <- if headless
-                    then return [""]
-                    else simpleTableSep '=' >> many1 (notFollowedBy (simpleDashedLines '=') >> anyLine)
-  dashes      <- if headless
-                    then simpleDashedLines '='
-                    else simpleDashedLines '=' <|> simpleDashedLines '-'
+  dashes <- simpleDashedLines '='
   newline
-  let lines'   = map snd dashes
-  let indices  = scanl (+) 0 lines'
+
+  rawContent  <- if headless
+                    then return [("", Nothing)]
+                    else many1 $ notFollowedBy (simpleDashedLines '=') >> rowWithOptionalColSpan
+
+  if headless
+    then return ' '
+    else simpleTableSep '='
+
+  let (lines', indices) = dashedLinesToLinesWithIndices dashes
   let aligns   = replicate (length lines') AlignDefault
   let rawHeads = if headless
                     then []
                     else map (simpleTableSplitLine indices) rawContent
-  heads <- mapM ( mapM $ parseFromString' (mconcat <$> many plain) . trim) rawHeads
-  return (heads, aligns, indices)
+
+  when (any isNothing rawHeads) $ Prelude.fail "col spans don't match"
+
+  let justRawHeads  = catMaybes rawHeads
+  let rawHeads'     = map fst <$> justRawHeads
+  let columnSpans   = map snd <$> justRawHeads
+  heads <- mapM (mapM $ parseFromString' (mconcat <$> many plain) . trim) rawHeads'
+  let headsWithSpans = zipWith3 zip3 heads singleRowSpans columnSpans
+  return (headsWithSpans, aligns, indices)
+
+rowWithOptionalColSpan :: Monad m
+                       => RSTParser m (Text, Maybe [Int])
+rowWithOptionalColSpan = do
+  line <- anyLine
+  colSpanHyphens <- optionMaybe $ do
+    colHyphens <- simpleDashedLines '-'
+    newline
+    return colHyphens
+
+  let colSpan = fmap colSpanFromHyphens colSpanHyphens
+  return (line, colSpan)
+ where
+  colSpanFromHyphens colSpanHyphens = snd $ dashedLinesToLinesWithIndices colSpanHyphens
 
 -- Parse a simple table.
 simpleTable :: PandocMonad m
@@ -1435,7 +1476,7 @@ simpleTable :: PandocMonad m
 simpleTable headless = do
   let wrapIdFst (a, b, c) = (Identity a, b, c)
       wrapId = fmap Identity
-  tbl <- runIdentity <$> tableWith
+  tbl <- runIdentity <$> tableWithSpans
            (wrapIdFst <$> simpleTableHeader headless)
            (wrapId <$> simpleTableRow)
            sep simpleTableFooter
@@ -1806,3 +1847,37 @@ inlineAnchor = try $ do
     s{ stateKeys = M.insert (toKey name) (("#" <> ident, ""), nullAttr)
                     (stateKeys s) }
   pure $ B.spanWith (ident,[],[]) (B.text name)
+
+dashedLinesToLinesWithIndices :: [(Int, Int)] -> ([Int], [Int])
+dashedLinesToLinesWithIndices dashes =
+  let lines'  = map snd dashes
+      indices = scanl (+) 0 lines'
+  in  (lines', indices)
+
+-- | Determines column spans by appying indices of a table border with column span indices.
+--
+-- The indices need to align.
+colSpans :: [Int] -> [Int] -> Maybe [ColSpan]
+colSpans [] []  = Just []
+colSpans [] _   = Nothing
+colSpans _ []   = Nothing
+colSpans (index : indices) colSpanIndices@(colIndex : colSpanIndicesTail)
+    | index /= colIndex = colSpans indices colSpanIndices
+    | otherwise =
+        -- For a matching index start counting the column spans.
+        let (spanCount, remainingIndices, remainingColSpanindices) = colSpanCount indices colSpanIndicesTail 1
+        in  (:) spanCount <$> colSpans remainingIndices remainingColSpanindices
+
+-- | Counts column spans by consuming all non-matching indices until a matching one is encountered.
+--
+-- If the indices match, the end of a column span has been encountered and the
+-- column count can be returned. Otherwise, if the indices don't match, add to
+-- the span count until a matching index is found.
+colSpanCount :: [Int] -> [Int] -> ColSpan -> (ColSpan, [Int], [Int])
+colSpanCount [] colSpanIndices spanCount = (spanCount, [], colSpanIndices)
+colSpanCount _ [] spanCount = (spanCount, [], [])
+colSpanCount indices@(index : indicesTail) colSpanIndices@(colIndex : colSpanIndicesTail) spanCount
+    | index == colIndex = case colSpanIndicesTail of
+        [] -> (spanCount, indicesTail, colSpanIndicesTail)
+        _ -> (spanCount, indices, colSpanIndices)
+    | otherwise = colSpanCount indicesTail colSpanIndices $ spanCount + 1
author	Tuong Nguyen Manh <[email protected]>	2025-09-06 18:20:32 +0200
committer	John MacFarlane <[email protected]>	2025-09-09 10:27:18 +0200
commit	47e1cc46c67e937f7202d98de1d9bdd857f7b454 (patch)
tree	0cec278c9f9339d94766576157da635e96f84035 /src/Text
parent	ff12c20f20067c55f8f55bb66e710b1d8910ad3a (diff)