diff options
| author | Ruqi <[email protected]> | 2022-08-30 14:51:09 +0700 |
|---|---|---|
| committer | GitHub <[email protected]> | 2022-08-30 09:51:09 +0200 |
| commit | 8b5fb5019897138c9e65c1c8ec73feaf341c3efa (patch) | |
| tree | 96bef76af709e449b28eaeb681de50613baa81c9 | |
| parent | fecd3366d8d9850131c5b0a2cbab53c65a030405 (diff) | |
Mediawiki reader: Parse table cell with attribs, to support rowspan, colspan (#8231)
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 42 | ||||
| -rw-r--r-- | test/command/2649.md | 17 | ||||
| -rw-r--r-- | test/mediawiki-reader.native | 84 |
4 files changed, 47 insertions, 97 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4fb6028e4..4e2be9382 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -21,6 +21,7 @@ module Text.Pandoc.Readers.HTML ( readHtml , isBlockTag , isTextTag , isCommentTag + , toAttr ) where import Control.Applicative ((<|>)) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 0fdfa8b84..8e86a0634 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested, tableCaption) -import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) +import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag, toAttr) import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, splitTextBy, tshow, formatCode) import Text.Pandoc.XML (fromEntities) @@ -222,22 +222,21 @@ table = do optional rowsep hasheader <- option False $ True <$ lookAhead (skipSpaces *> char '!') (cellspecs',hdr) <- unzip <$> tableRow - let widths = map ((tableWidth *) . snd) cellspecs' + let widths = map (tableWidth *) cellspecs' let restwidth = tableWidth - sum widths let zerocols = length $ filter (==0.0) widths let defaultwidth = if zerocols == 0 || zerocols == length widths then ColWidthDefault else ColWidth $ restwidth / fromIntegral zerocols let widths' = map (\w -> if w > 0 then ColWidth w else defaultwidth) widths - let cellspecs = zip (map fst cellspecs') widths' + let cellspecs = zip (calculateAlignments hdr) widths' rows' <- many $ try $ rowsep *> (map snd <$> tableRow) optional blanklines tableEnd - let cols = length hdr let (headers,rows) = if hasheader then (hdr, rows') - else (replicate cols mempty, hdr:rows') - let toRow = Row nullAttr . map B.simpleCell + else ([], hdr:rows') + let toRow = Row nullAttr toHeaderRow l = [toRow l | not (null l)] return $ B.table (B.simpleCaption $ B.plain caption) cellspecs @@ -245,6 +244,12 @@ table = do [TableBody nullAttr 0 [] $ map toRow rows] (TableFoot nullAttr []) +calculateAlignments :: [Cell] -> [Alignment] +calculateAlignments = map cellAligns + where + cellAligns :: Cell -> Alignment + cellAligns (Cell _ align _ _ _) = align + parseAttrs :: PandocMonad m => MWParser m [(Text,Text)] parseAttrs = many1 parseAttr @@ -252,7 +257,9 @@ parseAttr :: PandocMonad m => MWParser m (Text, Text) parseAttr = try $ do skipMany spaceChar k <- many1Char letter + skipMany spaceChar char '=' + skipMany spaceChar v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"')) <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) @@ -289,6 +296,7 @@ cellsep = try $ do tableCaption :: PandocMonad m => MWParser m Inlines tableCaption = try $ do + optional rowsep guardColumnOne skipSpaces sym "|+" @@ -296,14 +304,14 @@ tableCaption = try $ do trimInlines . mconcat <$> many (notFollowedBy (cellsep <|> rowsep) *> inline) -tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)] +tableRow :: PandocMonad m => MWParser m [(Double, Cell)] tableRow = try $ skipMany htmlComment *> many tableCell -tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks) +tableCell :: PandocMonad m => MWParser m (Double, Cell) tableCell = try $ do cellsep skipMany spaceChar - attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <* + attribs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <* notFollowedBy (char '|') skipMany spaceChar pos' <- getPosition @@ -311,15 +319,25 @@ tableCell = try $ do ((snd <$> withRaw table) <|> countChar 1 anyChar)) bs <- parseFromString (do setPosition pos' mconcat <$> many block) ls - let align = case lookup "align" attrs of + let align = case lookup "align" attribs of Just "left" -> AlignLeft Just "right" -> AlignRight Just "center" -> AlignCenter _ -> AlignDefault - let width = case lookup "width" attrs of + let width = case lookup "width" attribs of Just xs -> fromMaybe 0.0 $ parseWidth xs Nothing -> 0.0 - return ((align, width), bs) + let rowspan = RowSpan . fromMaybe 1 $ + safeRead =<< lookup "rowspan" attribs + let colspan = ColSpan . fromMaybe 1 $ + safeRead =<< lookup "colspan" attribs + let handledAttribs = ["align", "colspan", "rowspan"] + attribs' = foldr go [] attribs + go kv@(k, _) acc = case k of + -- drop attrib if it's already handled + _ | k `elem` handledAttribs -> acc + _ -> kv : acc + return (width, B.cellWith (toAttr attribs') align rowspan colspan bs) parseWidth :: Text -> Maybe Double parseWidth s = diff --git a/test/command/2649.md b/test/command/2649.md index 4ab059ea0..7225ae0f4 100644 --- a/test/command/2649.md +++ b/test/command/2649.md @@ -17,7 +17,7 @@ % pandoc -f mediawiki -t html5 {| border="4" cellspacing="2" cellpadding="0" WIDTH="100%" |----- -| peildatum Simbase || november 2005 || '''uitslagen Flohrgambiet''' || +| peildatum Simbase || november 2005 || colspan=2 | '''uitslagen Flohrgambiet''' || |----- | totaal aantal partijen Simbase || 7.316.773 | wit wint || 53% @@ -34,8 +34,7 @@ <tr class="odd"> <td><p>peildatum Simbase</p></td> <td><p>november 2005</p></td> -<td><p><strong>uitslagen Flohrgambiet</strong></p></td> -<td></td> +<td colspan="2"><p><strong>uitslagen Flohrgambiet</strong></p></td> </tr> <tr class="even"> <td><p>totaal aantal partijen Simbase</p></td> @@ -90,20 +89,20 @@ <tbody> <tr class="odd"> <td><p>1</p></td> -<td><p><a href="Sébastien_Loeb" title="wikilink">Sébastien -Loeb</a></p></td> +<td style="text-align: left;"><p><a href="Sébastien_Loeb" +title="wikilink">Sébastien Loeb</a></p></td> <td><p>78</p></td> </tr> <tr class="even"> <td><p>2</p></td> -<td><p><strong><a href="Sébastien_Ogier" title="wikilink">Sébastien -Ogier</a></strong></p></td> +<td style="text-align: left;"><p><strong><a href="Sébastien_Ogier" +title="wikilink">Sébastien Ogier</a></strong></p></td> <td><p>38</p></td> </tr> <tr class="odd"> <td><p>10</p></td> -<td><p><a href="Hannu_Mikkola" title="wikilink">Hannu -Mikkola</a></p></td> +<td style="text-align: left;"><p><a href="Hannu_Mikkola" +title="wikilink">Hannu Mikkola</a></p></td> <td><p>18</p></td> </tr> </tbody> diff --git a/test/mediawiki-reader.native b/test/mediawiki-reader.native index 184809ea6..9b0a61b4d 100644 --- a/test/mediawiki-reader.native +++ b/test/mediawiki-reader.native @@ -820,24 +820,7 @@ Pandoc [ ( AlignDefault , ColWidthDefault ) , ( AlignDefault , ColWidthDefault ) ] - (TableHead - ( "" , [] , [] ) - [ Row - ( "" , [] , [] ) - [ Cell - ( "" , [] , [] ) - AlignDefault - (RowSpan 1) - (ColSpan 1) - [] - , Cell - ( "" , [] , [] ) - AlignDefault - (RowSpan 1) - (ColSpan 1) - [] - ] - ]) + (TableHead ( "" , [] , [] ) []) [ TableBody ( "" , [] , [] ) (RowHeadColumns 0) @@ -1016,30 +999,7 @@ Pandoc , ( AlignDefault , ColWidthDefault ) , ( AlignDefault , ColWidthDefault ) ] - (TableHead - ( "" , [] , [] ) - [ Row - ( "" , [] , [] ) - [ Cell - ( "" , [] , [] ) - AlignDefault - (RowSpan 1) - (ColSpan 1) - [] - , Cell - ( "" , [] , [] ) - AlignDefault - (RowSpan 1) - (ColSpan 1) - [] - , Cell - ( "" , [] , [] ) - AlignDefault - (RowSpan 1) - (ColSpan 1) - [] - ] - ]) + (TableHead ( "" , [] , [] ) []) [ TableBody ( "" , [] , [] ) (RowHeadColumns 0) @@ -1122,20 +1082,20 @@ Pandoc [ Row ( "" , [] , [] ) [ Cell - ( "" , [] , [] ) - AlignDefault + ( "" , [] , [ ( "width" , "50%" ) ] ) + AlignLeft (RowSpan 1) (ColSpan 1) [ Para [ Str "Left" ] ] , Cell ( "" , [] , [] ) - AlignDefault + AlignRight (RowSpan 1) (ColSpan 1) [ Para [ Str "Right" ] ] , Cell ( "" , [] , [] ) - AlignDefault + AlignCenter (RowSpan 1) (ColSpan 1) [ Para [ Str "Center" ] ] @@ -1196,24 +1156,7 @@ Pandoc [ ( AlignDefault , ColWidthDefault ) , ( AlignDefault , ColWidthDefault ) ] - (TableHead - ( "" , [] , [] ) - [ Row - ( "" , [] , [] ) - [ Cell - ( "" , [] , [] ) - AlignDefault - (RowSpan 1) - (ColSpan 1) - [] - , Cell - ( "" , [] , [] ) - AlignDefault - (RowSpan 1) - (ColSpan 1) - [] - ] - ]) + (TableHead ( "" , [] , [] ) []) [ TableBody ( "" , [] , [] ) (RowHeadColumns 0) @@ -1321,18 +1264,7 @@ Pandoc ( "" , [] , [] ) (Caption Nothing []) [ ( AlignDefault , ColWidthDefault ) ] - (TableHead - ( "" , [] , [] ) - [ Row - ( "" , [] , [] ) - [ Cell - ( "" , [] , [] ) - AlignDefault - (RowSpan 1) - (ColSpan 1) - [] - ] - ]) + (TableHead ( "" , [] , [] ) []) [ TableBody ( "" , [] , [] ) (RowHeadColumns 0) |
