aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorTuong Nguyen Manh <[email protected]>2025-09-06 18:20:32 +0200
committerJohn MacFarlane <[email protected]>2025-09-09 10:27:18 +0200
commit47e1cc46c67e937f7202d98de1d9bdd857f7b454 (patch)
tree0cec278c9f9339d94766576157da635e96f84035 /src/Text
parentff12c20f20067c55f8f55bb66e710b1d8910ad3a (diff)
RST Reader: Add col spans for simple tables
[API change] Text.Pandoc.Parsing: New functions `tableWithSpans`, `tableWithSpans'`, `toTableComponentsWithSpans` and `toTableComponentsWithSpans'` take a list of lists of (Blocks, RowSpan, ColSpan) to parse a Table with different RowSpan and ColSpan values accordingly. New helper functions `singleRowSpans` and `singleColumnSpans` help set all RowSpans or ColSpans to be 1 in case the table format only allows setting one or the other.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Parsing.hs12
-rw-r--r--src/Text/Pandoc/Parsing/GridTable.hs68
-rw-r--r--src/Text/Pandoc/Readers/RST.hs127
3 files changed, 175 insertions, 32 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 67fcc363d..ddd409d0b 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -61,6 +61,8 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
lineBlockLines,
tableWith,
tableWith',
+ tableWithSpans,
+ tableWithSpans',
widthsFromIndices,
gridTableWith,
gridTableWith',
@@ -68,6 +70,10 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources,
TableNormalization (..),
toTableComponents,
toTableComponents',
+ toTableComponentsWithSpans,
+ toTableComponentsWithSpans',
+ singleRowSpans,
+ singleColumnSpans,
readWith,
readWithM,
testStringWith,
@@ -303,8 +309,14 @@ import Text.Pandoc.Parsing.GridTable
gridTableWith',
tableWith,
tableWith',
+ tableWithSpans,
+ tableWithSpans',
toTableComponents,
toTableComponents',
+ toTableComponentsWithSpans,
+ toTableComponentsWithSpans',
+ singleRowSpans,
+ singleColumnSpans,
widthsFromIndices,
TableComponents(..),
TableNormalization(..) )
diff --git a/src/Text/Pandoc/Parsing/GridTable.hs b/src/Text/Pandoc/Parsing/GridTable.hs
index aa4a063a0..43871ef0f 100644
--- a/src/Text/Pandoc/Parsing/GridTable.hs
+++ b/src/Text/Pandoc/Parsing/GridTable.hs
@@ -14,12 +14,18 @@ module Text.Pandoc.Parsing.GridTable
, gridTableWith'
, tableWith
, tableWith'
+ , tableWithSpans
+ , tableWithSpans'
, widthsFromIndices
-- * Components of a plain-text table
, TableComponents (..)
, TableNormalization (..)
, toTableComponents
, toTableComponents'
+ , toTableComponentsWithSpans
+ , toTableComponentsWithSpans'
+ , singleRowSpans
+ , singleColumnSpans
)
where
@@ -65,6 +71,19 @@ toTableComponents' :: TableNormalization
-> [Alignment] -> [Double] -> [[Blocks]] -> [[Blocks]]
-> TableComponents
toTableComponents' normalization aligns widths heads rows =
+ toTableComponentsWithSpans' normalization aligns widths (singleSpansBlocks heads) (singleSpansBlocks rows)
+
+-- | Bundles basic table components with span information into a single value.
+toTableComponentsWithSpans :: [Alignment] -> [Double] -> [[(Blocks, RowSpan, ColSpan)]] -> [[(Blocks, RowSpan, ColSpan)]]
+ -> TableComponents
+toTableComponentsWithSpans = toTableComponentsWithSpans' NoNormalization
+
+-- | Bundles basic table components with span information into a single value,
+-- performing normalizations as necessary.
+toTableComponentsWithSpans' :: TableNormalization
+ -> [Alignment] -> [Double] -> [[(Blocks, RowSpan, ColSpan)]] -> [[(Blocks, RowSpan, ColSpan)]]
+ -> TableComponents
+toTableComponentsWithSpans' normalization aligns widths heads rows =
let th = TableHead nullAttr (mapMaybe (toHeaderRow normalization) heads)
tb = TableBody nullAttr 0 [] (map toRow rows)
tf = TableFoot nullAttr []
@@ -208,7 +227,31 @@ tableWith' :: (Stream s m Char, UpdateSourcePos s Char,
-> ParsecT s st m sep -- ^ line parser
-> ParsecT s st m end -- ^ footer parser
-> ParsecT s st m (mf TableComponents)
-tableWith' n11n headerParser rowParser lineParser footerParser = try $ do
+tableWith' n11n headerParser rowParser lineParser footerParser =
+ tableWithSpans' n11n headerParser' rowParser' lineParser footerParser
+ where
+ headerParser' = fmap (\(heads, aligns, indices) -> (fmap singleSpansBlocks heads, aligns, indices)) headerParser
+ rowParser' indices = fmap (\row -> zip3 row (repeat 1) (repeat 1)) <$> rowParser indices
+
+tableWithSpans :: (Stream s m Char, UpdateSourcePos s Char,
+ HasReaderOptions st, Monad mf)
+ => ParsecT s st m (mf [[(Blocks, RowSpan, ColSpan)]], [Alignment], [Int]) -- ^ header parser
+ -> ([Int] -> ParsecT s st m (mf [(Blocks, RowSpan, ColSpan)])) -- ^ row parser
+ -> ParsecT s st m sep -- ^ line parser
+ -> ParsecT s st m end -- ^ footer parser
+ -> ParsecT s st m (mf Blocks)
+tableWithSpans hp rp lp fp = fmap tableFromComponents <$>
+ tableWithSpans' NoNormalization hp rp lp fp
+
+tableWithSpans' :: (Stream s m Char, UpdateSourcePos s Char,
+ HasReaderOptions st, Monad mf)
+ => TableNormalization
+ -> ParsecT s st m (mf [[(Blocks, RowSpan, ColSpan)]], [Alignment], [Int]) -- ^ header parser
+ -> ([Int] -> ParsecT s st m (mf [(Blocks, RowSpan, ColSpan)])) -- ^ row parser
+ -> ParsecT s st m sep -- ^ line parser
+ -> ParsecT s st m end -- ^ footer parser
+ -> ParsecT s st m (mf TableComponents)
+tableWithSpans' n11n headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
footerParser
@@ -216,15 +259,17 @@ tableWith' n11n headerParser rowParser lineParser footerParser = try $ do
let widths = if null indices
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
- return $ toTableComponents' n11n aligns widths <$> heads <*> lines'
+ return $ toTableComponentsWithSpans' n11n aligns widths <$> heads <*> lines'
-toRow :: [Blocks] -> Row
-toRow = Row nullAttr . map B.simpleCell
+toRow :: [(Blocks, RowSpan, ColSpan)] -> Row
+toRow = Row nullAttr . map (\(blocks, rowSpan, columnSpan) -> B.cell AlignDefault rowSpan columnSpan blocks)
-toHeaderRow :: TableNormalization -> [Blocks] -> Maybe Row
+toHeaderRow :: TableNormalization -> [(Blocks, RowSpan, ColSpan)] -> Maybe Row
toHeaderRow = \case
NoNormalization -> \l -> if not (null l) then Just (toRow l) else Nothing
- NormalizeHeader -> \l -> if not (all null l) then Just (toRow l) else Nothing
+ NormalizeHeader -> \l -> if not (all nullHeaderRow l) then Just (toRow l) else Nothing
+ where
+ nullHeaderRow (l, _, _) = null l
-- | Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal
@@ -250,3 +295,14 @@ widthsFromIndices numColumns' indices =
else fromIntegral numColumns
fracs = map (\l -> fromIntegral l / quotient) lengths in
drop 1 fracs
+
+-- | List of lists of `RowSpan` of 1.
+singleRowSpans :: [[RowSpan]]
+singleRowSpans = repeat $ repeat 1
+
+-- | List of lists of `ColsSpan` of 1.
+singleColumnSpans :: [[ColSpan]]
+singleColumnSpans = repeat $ repeat 1
+
+singleSpansBlocks :: [[Blocks]] -> [[(Blocks, RowSpan, ColSpan)]]
+singleSpansBlocks blocks = zipWith3 zip3 blocks singleRowSpans singleColumnSpans
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 20f1177c8..989aab3ac 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -23,7 +23,7 @@ import Data.Char (isHexDigit, isSpace, toUpper, isAlphaNum, generalCategory,
DashPunctuation, OtherSymbol))
import Data.List (deleteFirstsBy, elemIndex, nub, partition, sort, transpose)
import qualified Data.Map as M
-import Data.Maybe (fromMaybe, maybeToList, isJust)
+import Data.Maybe (fromMaybe, maybeToList, isJust, isNothing, catMaybes)
import Data.Sequence (ViewR (..), viewr)
import Data.Text (Text)
import qualified Data.Text as T
@@ -1358,7 +1358,6 @@ anchor = try $ do
-- support for them
--
-- Simple tables TODO:
--- - column spans
-- - multiline support
-- - ensure that rightmost column span does not need to reach end
-- - require at least 2 columns
@@ -1367,10 +1366,17 @@ dashedLine :: Monad m => Char -> ParsecT Sources st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
- return (length dashes, length $ dashes ++ sp)
+ return (length dashes, length sp)
simpleDashedLines :: Monad m => Char -> ParsecT Sources st m [(Int,Int)]
-simpleDashedLines ch = try $ many1 (dashedLine ch)
+simpleDashedLines ch = do
+ lines' <- try $ many1 (dashedLine ch)
+ return $ addSpaces lines'
+ where
+ addSpaces [] = []
+ addSpaces [(dashes, _)] = [(dashes, dashes)] -- Don't count trailing whitespaces
+ addSpaces ((dashes, sp) : moreLines) =
+ (dashes, dashes + sp) : addSpaces moreLines
-- Parse a table row separator
simpleTableSep :: Monad m => Char -> RSTParser m Char
@@ -1381,52 +1387,87 @@ simpleTableFooter :: Monad m => RSTParser m Text
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-- Parse a raw line and split it into chunks by indices.
-simpleTableRawLine :: Monad m => [Int] -> RSTParser m [Text]
-simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine
+simpleTableRawLine :: Monad m => [Int] -> RSTParser m [(Text, ColSpan)]
+simpleTableRawLine indices = do
+ row <- rowWithOptionalColSpan
+
+ case simpleTableSplitLine indices row of
+ Just rowLine -> return rowLine
+ Nothing -> Prelude.fail "col spans don't match"
-simpleTableRawLineWithInitialEmptyCell :: Monad m => [Int] -> RSTParser m [Text]
+simpleTableRawLineWithInitialEmptyCell :: Monad m => [Int] -> RSTParser m [(Text, ColSpan)]
simpleTableRawLineWithInitialEmptyCell indices = try $ do
cs <- simpleTableRawLine indices
let isEmptyCell = T.all (\c -> c == ' ' || c == '\t')
case cs of
- c:_ | isEmptyCell c -> return cs
+ c:_ | isEmptyCell (fst c) -> return cs
_ -> mzero
-- Parse a table row and return a list of blocks (columns).
-simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks]
+simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [(Blocks, RowSpan, ColSpan)]
simpleTableRow indices = do
notFollowedBy' (blanklines <|> simpleTableFooter)
firstLine <- simpleTableRawLine indices
conLines <- many $ simpleTableRawLineWithInitialEmptyCell indices
- let cols = map T.unlines . transpose $ firstLine : conLines ++
+ let cols = map T.unlines . transpose $ (map fst firstLine) : (map (map fst) conLines) ++
[replicate (length indices) ""
| not (null conLines)]
- mapM parseCell cols
+ let rowParser = mapM parseCell cols
+ fmap (\blocks -> zip3 blocks (repeat 1) (map snd firstLine)) rowParser
-simpleTableSplitLine :: [Int] -> Text -> [Text]
-simpleTableSplitLine indices line =
- map trimr $ drop 1 $ splitTextByIndices (init indices) line
+simpleTableSplitLine :: [Int] -> (Text, Maybe [Int]) -> Maybe [(Text, ColSpan)]
+simpleTableSplitLine indices (line, maybeColspanIndices) =
+ fmap (zip tableLines) columnSpans
+ where
+ splitTableLines lineIndices = map trimr $ drop 1 $ splitTextByIndices (init lineIndices) line
+ (tableLines, columnSpans) = case maybeColspanIndices of
+ Nothing -> (splitTableLines indices, Just $ repeat 1)
+ Just colSpanIndices -> (splitTableLines colSpanIndices, colSpans indices colSpanIndices)
simpleTableHeader :: PandocMonad m
=> Bool -- ^ Headerless table
- -> RSTParser m ([[Blocks]], [Alignment], [Int])
+ -> RSTParser m ([[(Blocks, RowSpan, ColSpan)]], [Alignment], [Int])
simpleTableHeader headless = try $ do
optional blanklines
- rawContent <- if headless
- then return [""]
- else simpleTableSep '=' >> many1 (notFollowedBy (simpleDashedLines '=') >> anyLine)
- dashes <- if headless
- then simpleDashedLines '='
- else simpleDashedLines '=' <|> simpleDashedLines '-'
+ dashes <- simpleDashedLines '='
newline
- let lines' = map snd dashes
- let indices = scanl (+) 0 lines'
+
+ rawContent <- if headless
+ then return [("", Nothing)]
+ else many1 $ notFollowedBy (simpleDashedLines '=') >> rowWithOptionalColSpan
+
+ if headless
+ then return ' '
+ else simpleTableSep '='
+
+ let (lines', indices) = dashedLinesToLinesWithIndices dashes
let aligns = replicate (length lines') AlignDefault
let rawHeads = if headless
then []
else map (simpleTableSplitLine indices) rawContent
- heads <- mapM ( mapM $ parseFromString' (mconcat <$> many plain) . trim) rawHeads
- return (heads, aligns, indices)
+
+ when (any isNothing rawHeads) $ Prelude.fail "col spans don't match"
+
+ let justRawHeads = catMaybes rawHeads
+ let rawHeads' = map fst <$> justRawHeads
+ let columnSpans = map snd <$> justRawHeads
+ heads <- mapM (mapM $ parseFromString' (mconcat <$> many plain) . trim) rawHeads'
+ let headsWithSpans = zipWith3 zip3 heads singleRowSpans columnSpans
+ return (headsWithSpans, aligns, indices)
+
+rowWithOptionalColSpan :: Monad m
+ => RSTParser m (Text, Maybe [Int])
+rowWithOptionalColSpan = do
+ line <- anyLine
+ colSpanHyphens <- optionMaybe $ do
+ colHyphens <- simpleDashedLines '-'
+ newline
+ return colHyphens
+
+ let colSpan = fmap colSpanFromHyphens colSpanHyphens
+ return (line, colSpan)
+ where
+ colSpanFromHyphens colSpanHyphens = snd $ dashedLinesToLinesWithIndices colSpanHyphens
-- Parse a simple table.
simpleTable :: PandocMonad m
@@ -1435,7 +1476,7 @@ simpleTable :: PandocMonad m
simpleTable headless = do
let wrapIdFst (a, b, c) = (Identity a, b, c)
wrapId = fmap Identity
- tbl <- runIdentity <$> tableWith
+ tbl <- runIdentity <$> tableWithSpans
(wrapIdFst <$> simpleTableHeader headless)
(wrapId <$> simpleTableRow)
sep simpleTableFooter
@@ -1806,3 +1847,37 @@ inlineAnchor = try $ do
s{ stateKeys = M.insert (toKey name) (("#" <> ident, ""), nullAttr)
(stateKeys s) }
pure $ B.spanWith (ident,[],[]) (B.text name)
+
+dashedLinesToLinesWithIndices :: [(Int, Int)] -> ([Int], [Int])
+dashedLinesToLinesWithIndices dashes =
+ let lines' = map snd dashes
+ indices = scanl (+) 0 lines'
+ in (lines', indices)
+
+-- | Determines column spans by appying indices of a table border with column span indices.
+--
+-- The indices need to align.
+colSpans :: [Int] -> [Int] -> Maybe [ColSpan]
+colSpans [] [] = Just []
+colSpans [] _ = Nothing
+colSpans _ [] = Nothing
+colSpans (index : indices) colSpanIndices@(colIndex : colSpanIndicesTail)
+ | index /= colIndex = colSpans indices colSpanIndices
+ | otherwise =
+ -- For a matching index start counting the column spans.
+ let (spanCount, remainingIndices, remainingColSpanindices) = colSpanCount indices colSpanIndicesTail 1
+ in (:) spanCount <$> colSpans remainingIndices remainingColSpanindices
+
+-- | Counts column spans by consuming all non-matching indices until a matching one is encountered.
+--
+-- If the indices match, the end of a column span has been encountered and the
+-- column count can be returned. Otherwise, if the indices don't match, add to
+-- the span count until a matching index is found.
+colSpanCount :: [Int] -> [Int] -> ColSpan -> (ColSpan, [Int], [Int])
+colSpanCount [] colSpanIndices spanCount = (spanCount, [], colSpanIndices)
+colSpanCount _ [] spanCount = (spanCount, [], [])
+colSpanCount indices@(index : indicesTail) colSpanIndices@(colIndex : colSpanIndicesTail) spanCount
+ | index == colIndex = case colSpanIndicesTail of
+ [] -> (spanCount, indicesTail, colSpanIndicesTail)
+ _ -> (spanCount, indices, colSpanIndices)
+ | otherwise = colSpanCount indicesTail colSpanIndices $ spanCount + 1