diff options
| author | Tuong Nguyen Manh <[email protected]> | 2025-09-06 18:20:32 +0200 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2025-09-09 10:27:18 +0200 |
| commit | 47e1cc46c67e937f7202d98de1d9bdd857f7b454 (patch) | |
| tree | 0cec278c9f9339d94766576157da635e96f84035 /src/Text | |
| parent | ff12c20f20067c55f8f55bb66e710b1d8910ad3a (diff) | |
RST Reader: Add col spans for simple tables
[API change] Text.Pandoc.Parsing: New functions `tableWithSpans`,
`tableWithSpans'`, `toTableComponentsWithSpans` and
`toTableComponentsWithSpans'` take a list of lists of
(Blocks, RowSpan, ColSpan) to parse a Table with different RowSpan and
ColSpan values accordingly.
New helper functions `singleRowSpans` and `singleColumnSpans` help set
all RowSpans or ColSpans to be 1 in case the table format only allows
setting one or the other.
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Parsing.hs | 12 | ||||
| -rw-r--r-- | src/Text/Pandoc/Parsing/GridTable.hs | 68 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 127 |
3 files changed, 175 insertions, 32 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 67fcc363d..ddd409d0b 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -61,6 +61,8 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, lineBlockLines, tableWith, tableWith', + tableWithSpans, + tableWithSpans', widthsFromIndices, gridTableWith, gridTableWith', @@ -68,6 +70,10 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, TableNormalization (..), toTableComponents, toTableComponents', + toTableComponentsWithSpans, + toTableComponentsWithSpans', + singleRowSpans, + singleColumnSpans, readWith, readWithM, testStringWith, @@ -303,8 +309,14 @@ import Text.Pandoc.Parsing.GridTable gridTableWith', tableWith, tableWith', + tableWithSpans, + tableWithSpans', toTableComponents, toTableComponents', + toTableComponentsWithSpans, + toTableComponentsWithSpans', + singleRowSpans, + singleColumnSpans, widthsFromIndices, TableComponents(..), TableNormalization(..) ) diff --git a/src/Text/Pandoc/Parsing/GridTable.hs b/src/Text/Pandoc/Parsing/GridTable.hs index aa4a063a0..43871ef0f 100644 --- a/src/Text/Pandoc/Parsing/GridTable.hs +++ b/src/Text/Pandoc/Parsing/GridTable.hs @@ -14,12 +14,18 @@ module Text.Pandoc.Parsing.GridTable , gridTableWith' , tableWith , tableWith' + , tableWithSpans + , tableWithSpans' , widthsFromIndices -- * Components of a plain-text table , TableComponents (..) , TableNormalization (..) , toTableComponents , toTableComponents' + , toTableComponentsWithSpans + , toTableComponentsWithSpans' + , singleRowSpans + , singleColumnSpans ) where @@ -65,6 +71,19 @@ toTableComponents' :: TableNormalization -> [Alignment] -> [Double] -> [[Blocks]] -> [[Blocks]] -> TableComponents toTableComponents' normalization aligns widths heads rows = + toTableComponentsWithSpans' normalization aligns widths (singleSpansBlocks heads) (singleSpansBlocks rows) + +-- | Bundles basic table components with span information into a single value. +toTableComponentsWithSpans :: [Alignment] -> [Double] -> [[(Blocks, RowSpan, ColSpan)]] -> [[(Blocks, RowSpan, ColSpan)]] + -> TableComponents +toTableComponentsWithSpans = toTableComponentsWithSpans' NoNormalization + +-- | Bundles basic table components with span information into a single value, +-- performing normalizations as necessary. +toTableComponentsWithSpans' :: TableNormalization + -> [Alignment] -> [Double] -> [[(Blocks, RowSpan, ColSpan)]] -> [[(Blocks, RowSpan, ColSpan)]] + -> TableComponents +toTableComponentsWithSpans' normalization aligns widths heads rows = let th = TableHead nullAttr (mapMaybe (toHeaderRow normalization) heads) tb = TableBody nullAttr 0 [] (map toRow rows) tf = TableFoot nullAttr [] @@ -208,7 +227,31 @@ tableWith' :: (Stream s m Char, UpdateSourcePos s Char, -> ParsecT s st m sep -- ^ line parser -> ParsecT s st m end -- ^ footer parser -> ParsecT s st m (mf TableComponents) -tableWith' n11n headerParser rowParser lineParser footerParser = try $ do +tableWith' n11n headerParser rowParser lineParser footerParser = + tableWithSpans' n11n headerParser' rowParser' lineParser footerParser + where + headerParser' = fmap (\(heads, aligns, indices) -> (fmap singleSpansBlocks heads, aligns, indices)) headerParser + rowParser' indices = fmap (\row -> zip3 row (repeat 1) (repeat 1)) <$> rowParser indices + +tableWithSpans :: (Stream s m Char, UpdateSourcePos s Char, + HasReaderOptions st, Monad mf) + => ParsecT s st m (mf [[(Blocks, RowSpan, ColSpan)]], [Alignment], [Int]) -- ^ header parser + -> ([Int] -> ParsecT s st m (mf [(Blocks, RowSpan, ColSpan)])) -- ^ row parser + -> ParsecT s st m sep -- ^ line parser + -> ParsecT s st m end -- ^ footer parser + -> ParsecT s st m (mf Blocks) +tableWithSpans hp rp lp fp = fmap tableFromComponents <$> + tableWithSpans' NoNormalization hp rp lp fp + +tableWithSpans' :: (Stream s m Char, UpdateSourcePos s Char, + HasReaderOptions st, Monad mf) + => TableNormalization + -> ParsecT s st m (mf [[(Blocks, RowSpan, ColSpan)]], [Alignment], [Int]) -- ^ header parser + -> ([Int] -> ParsecT s st m (mf [(Blocks, RowSpan, ColSpan)])) -- ^ row parser + -> ParsecT s st m sep -- ^ line parser + -> ParsecT s st m end -- ^ footer parser + -> ParsecT s st m (mf TableComponents) +tableWithSpans' n11n headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser footerParser @@ -216,15 +259,17 @@ tableWith' n11n headerParser rowParser lineParser footerParser = try $ do let widths = if null indices then replicate (length aligns) 0.0 else widthsFromIndices numColumns indices - return $ toTableComponents' n11n aligns widths <$> heads <*> lines' + return $ toTableComponentsWithSpans' n11n aligns widths <$> heads <*> lines' -toRow :: [Blocks] -> Row -toRow = Row nullAttr . map B.simpleCell +toRow :: [(Blocks, RowSpan, ColSpan)] -> Row +toRow = Row nullAttr . map (\(blocks, rowSpan, columnSpan) -> B.cell AlignDefault rowSpan columnSpan blocks) -toHeaderRow :: TableNormalization -> [Blocks] -> Maybe Row +toHeaderRow :: TableNormalization -> [(Blocks, RowSpan, ColSpan)] -> Maybe Row toHeaderRow = \case NoNormalization -> \l -> if not (null l) then Just (toRow l) else Nothing - NormalizeHeader -> \l -> if not (all null l) then Just (toRow l) else Nothing + NormalizeHeader -> \l -> if not (all nullHeaderRow l) then Just (toRow l) else Nothing + where + nullHeaderRow (l, _, _) = null l -- | Calculate relative widths of table columns, based on indices widthsFromIndices :: Int -- Number of columns on terminal @@ -250,3 +295,14 @@ widthsFromIndices numColumns' indices = else fromIntegral numColumns fracs = map (\l -> fromIntegral l / quotient) lengths in drop 1 fracs + +-- | List of lists of `RowSpan` of 1. +singleRowSpans :: [[RowSpan]] +singleRowSpans = repeat $ repeat 1 + +-- | List of lists of `ColsSpan` of 1. +singleColumnSpans :: [[ColSpan]] +singleColumnSpans = repeat $ repeat 1 + +singleSpansBlocks :: [[Blocks]] -> [[(Blocks, RowSpan, ColSpan)]] +singleSpansBlocks blocks = zipWith3 zip3 blocks singleRowSpans singleColumnSpans diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 20f1177c8..989aab3ac 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -23,7 +23,7 @@ import Data.Char (isHexDigit, isSpace, toUpper, isAlphaNum, generalCategory, DashPunctuation, OtherSymbol)) import Data.List (deleteFirstsBy, elemIndex, nub, partition, sort, transpose) import qualified Data.Map as M -import Data.Maybe (fromMaybe, maybeToList, isJust) +import Data.Maybe (fromMaybe, maybeToList, isJust, isNothing, catMaybes) import Data.Sequence (ViewR (..), viewr) import Data.Text (Text) import qualified Data.Text as T @@ -1358,7 +1358,6 @@ anchor = try $ do -- support for them -- -- Simple tables TODO: --- - column spans -- - multiline support -- - ensure that rightmost column span does not need to reach end -- - require at least 2 columns @@ -1367,10 +1366,17 @@ dashedLine :: Monad m => Char -> ParsecT Sources st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') - return (length dashes, length $ dashes ++ sp) + return (length dashes, length sp) simpleDashedLines :: Monad m => Char -> ParsecT Sources st m [(Int,Int)] -simpleDashedLines ch = try $ many1 (dashedLine ch) +simpleDashedLines ch = do + lines' <- try $ many1 (dashedLine ch) + return $ addSpaces lines' + where + addSpaces [] = [] + addSpaces [(dashes, _)] = [(dashes, dashes)] -- Don't count trailing whitespaces + addSpaces ((dashes, sp) : moreLines) = + (dashes, dashes + sp) : addSpaces moreLines -- Parse a table row separator simpleTableSep :: Monad m => Char -> RSTParser m Char @@ -1381,52 +1387,87 @@ simpleTableFooter :: Monad m => RSTParser m Text simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: Monad m => [Int] -> RSTParser m [Text] -simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine +simpleTableRawLine :: Monad m => [Int] -> RSTParser m [(Text, ColSpan)] +simpleTableRawLine indices = do + row <- rowWithOptionalColSpan + + case simpleTableSplitLine indices row of + Just rowLine -> return rowLine + Nothing -> Prelude.fail "col spans don't match" -simpleTableRawLineWithInitialEmptyCell :: Monad m => [Int] -> RSTParser m [Text] +simpleTableRawLineWithInitialEmptyCell :: Monad m => [Int] -> RSTParser m [(Text, ColSpan)] simpleTableRawLineWithInitialEmptyCell indices = try $ do cs <- simpleTableRawLine indices let isEmptyCell = T.all (\c -> c == ' ' || c == '\t') case cs of - c:_ | isEmptyCell c -> return cs + c:_ | isEmptyCell (fst c) -> return cs _ -> mzero -- Parse a table row and return a list of blocks (columns). -simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [Blocks] +simpleTableRow :: PandocMonad m => [Int] -> RSTParser m [(Blocks, RowSpan, ColSpan)] simpleTableRow indices = do notFollowedBy' (blanklines <|> simpleTableFooter) firstLine <- simpleTableRawLine indices conLines <- many $ simpleTableRawLineWithInitialEmptyCell indices - let cols = map T.unlines . transpose $ firstLine : conLines ++ + let cols = map T.unlines . transpose $ (map fst firstLine) : (map (map fst) conLines) ++ [replicate (length indices) "" | not (null conLines)] - mapM parseCell cols + let rowParser = mapM parseCell cols + fmap (\blocks -> zip3 blocks (repeat 1) (map snd firstLine)) rowParser -simpleTableSplitLine :: [Int] -> Text -> [Text] -simpleTableSplitLine indices line = - map trimr $ drop 1 $ splitTextByIndices (init indices) line +simpleTableSplitLine :: [Int] -> (Text, Maybe [Int]) -> Maybe [(Text, ColSpan)] +simpleTableSplitLine indices (line, maybeColspanIndices) = + fmap (zip tableLines) columnSpans + where + splitTableLines lineIndices = map trimr $ drop 1 $ splitTextByIndices (init lineIndices) line + (tableLines, columnSpans) = case maybeColspanIndices of + Nothing -> (splitTableLines indices, Just $ repeat 1) + Just colSpanIndices -> (splitTableLines colSpanIndices, colSpans indices colSpanIndices) simpleTableHeader :: PandocMonad m => Bool -- ^ Headerless table - -> RSTParser m ([[Blocks]], [Alignment], [Int]) + -> RSTParser m ([[(Blocks, RowSpan, ColSpan)]], [Alignment], [Int]) simpleTableHeader headless = try $ do optional blanklines - rawContent <- if headless - then return [""] - else simpleTableSep '=' >> many1 (notFollowedBy (simpleDashedLines '=') >> anyLine) - dashes <- if headless - then simpleDashedLines '=' - else simpleDashedLines '=' <|> simpleDashedLines '-' + dashes <- simpleDashedLines '=' newline - let lines' = map snd dashes - let indices = scanl (+) 0 lines' + + rawContent <- if headless + then return [("", Nothing)] + else many1 $ notFollowedBy (simpleDashedLines '=') >> rowWithOptionalColSpan + + if headless + then return ' ' + else simpleTableSep '=' + + let (lines', indices) = dashedLinesToLinesWithIndices dashes let aligns = replicate (length lines') AlignDefault let rawHeads = if headless then [] else map (simpleTableSplitLine indices) rawContent - heads <- mapM ( mapM $ parseFromString' (mconcat <$> many plain) . trim) rawHeads - return (heads, aligns, indices) + + when (any isNothing rawHeads) $ Prelude.fail "col spans don't match" + + let justRawHeads = catMaybes rawHeads + let rawHeads' = map fst <$> justRawHeads + let columnSpans = map snd <$> justRawHeads + heads <- mapM (mapM $ parseFromString' (mconcat <$> many plain) . trim) rawHeads' + let headsWithSpans = zipWith3 zip3 heads singleRowSpans columnSpans + return (headsWithSpans, aligns, indices) + +rowWithOptionalColSpan :: Monad m + => RSTParser m (Text, Maybe [Int]) +rowWithOptionalColSpan = do + line <- anyLine + colSpanHyphens <- optionMaybe $ do + colHyphens <- simpleDashedLines '-' + newline + return colHyphens + + let colSpan = fmap colSpanFromHyphens colSpanHyphens + return (line, colSpan) + where + colSpanFromHyphens colSpanHyphens = snd $ dashedLinesToLinesWithIndices colSpanHyphens -- Parse a simple table. simpleTable :: PandocMonad m @@ -1435,7 +1476,7 @@ simpleTable :: PandocMonad m simpleTable headless = do let wrapIdFst (a, b, c) = (Identity a, b, c) wrapId = fmap Identity - tbl <- runIdentity <$> tableWith + tbl <- runIdentity <$> tableWithSpans (wrapIdFst <$> simpleTableHeader headless) (wrapId <$> simpleTableRow) sep simpleTableFooter @@ -1806,3 +1847,37 @@ inlineAnchor = try $ do s{ stateKeys = M.insert (toKey name) (("#" <> ident, ""), nullAttr) (stateKeys s) } pure $ B.spanWith (ident,[],[]) (B.text name) + +dashedLinesToLinesWithIndices :: [(Int, Int)] -> ([Int], [Int]) +dashedLinesToLinesWithIndices dashes = + let lines' = map snd dashes + indices = scanl (+) 0 lines' + in (lines', indices) + +-- | Determines column spans by appying indices of a table border with column span indices. +-- +-- The indices need to align. +colSpans :: [Int] -> [Int] -> Maybe [ColSpan] +colSpans [] [] = Just [] +colSpans [] _ = Nothing +colSpans _ [] = Nothing +colSpans (index : indices) colSpanIndices@(colIndex : colSpanIndicesTail) + | index /= colIndex = colSpans indices colSpanIndices + | otherwise = + -- For a matching index start counting the column spans. + let (spanCount, remainingIndices, remainingColSpanindices) = colSpanCount indices colSpanIndicesTail 1 + in (:) spanCount <$> colSpans remainingIndices remainingColSpanindices + +-- | Counts column spans by consuming all non-matching indices until a matching one is encountered. +-- +-- If the indices match, the end of a column span has been encountered and the +-- column count can be returned. Otherwise, if the indices don't match, add to +-- the span count until a matching index is found. +colSpanCount :: [Int] -> [Int] -> ColSpan -> (ColSpan, [Int], [Int]) +colSpanCount [] colSpanIndices spanCount = (spanCount, [], colSpanIndices) +colSpanCount _ [] spanCount = (spanCount, [], []) +colSpanCount indices@(index : indicesTail) colSpanIndices@(colIndex : colSpanIndicesTail) spanCount + | index == colIndex = case colSpanIndicesTail of + [] -> (spanCount, indicesTail, colSpanIndicesTail) + _ -> (spanCount, indices, colSpanIndices) + | otherwise = colSpanCount indicesTail colSpanIndices $ spanCount + 1 |
