aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuqi <[email protected]>2022-08-30 14:51:09 +0700
committerGitHub <[email protected]>2022-08-30 09:51:09 +0200
commit8b5fb5019897138c9e65c1c8ec73feaf341c3efa (patch)
tree96bef76af709e449b28eaeb681de50613baa81c9
parentfecd3366d8d9850131c5b0a2cbab53c65a030405 (diff)
Mediawiki reader: Parse table cell with attribs, to support rowspan, colspan (#8231)
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs1
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs42
-rw-r--r--test/command/2649.md17
-rw-r--r--test/mediawiki-reader.native84
4 files changed, 47 insertions, 97 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 4fb6028e4..4e2be9382 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -21,6 +21,7 @@ module Text.Pandoc.Readers.HTML ( readHtml
, isBlockTag
, isTextTag
, isCommentTag
+ , toAttr
) where
import Control.Applicative ((<|>))
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 0fdfa8b84..8e86a0634 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -35,7 +35,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested, tableCaption)
-import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
+import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag, toAttr)
import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines,
trim, splitTextBy, tshow, formatCode)
import Text.Pandoc.XML (fromEntities)
@@ -222,22 +222,21 @@ table = do
optional rowsep
hasheader <- option False $ True <$ lookAhead (skipSpaces *> char '!')
(cellspecs',hdr) <- unzip <$> tableRow
- let widths = map ((tableWidth *) . snd) cellspecs'
+ let widths = map (tableWidth *) cellspecs'
let restwidth = tableWidth - sum widths
let zerocols = length $ filter (==0.0) widths
let defaultwidth = if zerocols == 0 || zerocols == length widths
then ColWidthDefault
else ColWidth $ restwidth / fromIntegral zerocols
let widths' = map (\w -> if w > 0 then ColWidth w else defaultwidth) widths
- let cellspecs = zip (map fst cellspecs') widths'
+ let cellspecs = zip (calculateAlignments hdr) widths'
rows' <- many $ try $ rowsep *> (map snd <$> tableRow)
optional blanklines
tableEnd
- let cols = length hdr
let (headers,rows) = if hasheader
then (hdr, rows')
- else (replicate cols mempty, hdr:rows')
- let toRow = Row nullAttr . map B.simpleCell
+ else ([], hdr:rows')
+ let toRow = Row nullAttr
toHeaderRow l = [toRow l | not (null l)]
return $ B.table (B.simpleCaption $ B.plain caption)
cellspecs
@@ -245,6 +244,12 @@ table = do
[TableBody nullAttr 0 [] $ map toRow rows]
(TableFoot nullAttr [])
+calculateAlignments :: [Cell] -> [Alignment]
+calculateAlignments = map cellAligns
+ where
+ cellAligns :: Cell -> Alignment
+ cellAligns (Cell _ align _ _ _) = align
+
parseAttrs :: PandocMonad m => MWParser m [(Text,Text)]
parseAttrs = many1 parseAttr
@@ -252,7 +257,9 @@ parseAttr :: PandocMonad m => MWParser m (Text, Text)
parseAttr = try $ do
skipMany spaceChar
k <- many1Char letter
+ skipMany spaceChar
char '='
+ skipMany spaceChar
v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"'))
<|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|')
return (k,v)
@@ -289,6 +296,7 @@ cellsep = try $ do
tableCaption :: PandocMonad m => MWParser m Inlines
tableCaption = try $ do
+ optional rowsep
guardColumnOne
skipSpaces
sym "|+"
@@ -296,14 +304,14 @@ tableCaption = try $ do
trimInlines . mconcat <$>
many (notFollowedBy (cellsep <|> rowsep) *> inline)
-tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)]
+tableRow :: PandocMonad m => MWParser m [(Double, Cell)]
tableRow = try $ skipMany htmlComment *> many tableCell
-tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks)
+tableCell :: PandocMonad m => MWParser m (Double, Cell)
tableCell = try $ do
cellsep
skipMany spaceChar
- attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <*
+ attribs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <*
notFollowedBy (char '|')
skipMany spaceChar
pos' <- getPosition
@@ -311,15 +319,25 @@ tableCell = try $ do
((snd <$> withRaw table) <|> countChar 1 anyChar))
bs <- parseFromString (do setPosition pos'
mconcat <$> many block) ls
- let align = case lookup "align" attrs of
+ let align = case lookup "align" attribs of
Just "left" -> AlignLeft
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
- let width = case lookup "width" attrs of
+ let width = case lookup "width" attribs of
Just xs -> fromMaybe 0.0 $ parseWidth xs
Nothing -> 0.0
- return ((align, width), bs)
+ let rowspan = RowSpan . fromMaybe 1 $
+ safeRead =<< lookup "rowspan" attribs
+ let colspan = ColSpan . fromMaybe 1 $
+ safeRead =<< lookup "colspan" attribs
+ let handledAttribs = ["align", "colspan", "rowspan"]
+ attribs' = foldr go [] attribs
+ go kv@(k, _) acc = case k of
+ -- drop attrib if it's already handled
+ _ | k `elem` handledAttribs -> acc
+ _ -> kv : acc
+ return (width, B.cellWith (toAttr attribs') align rowspan colspan bs)
parseWidth :: Text -> Maybe Double
parseWidth s =
diff --git a/test/command/2649.md b/test/command/2649.md
index 4ab059ea0..7225ae0f4 100644
--- a/test/command/2649.md
+++ b/test/command/2649.md
@@ -17,7 +17,7 @@
% pandoc -f mediawiki -t html5
{| border="4" cellspacing="2" cellpadding="0" WIDTH="100%"
|-----
-| peildatum Simbase || november 2005 || '''uitslagen Flohrgambiet''' ||
+| peildatum Simbase || november 2005 || colspan=2 | '''uitslagen Flohrgambiet''' ||
|-----
| totaal aantal partijen Simbase || 7.316.773
| wit wint || 53%
@@ -34,8 +34,7 @@
<tr class="odd">
<td><p>peildatum Simbase</p></td>
<td><p>november 2005</p></td>
-<td><p><strong>uitslagen Flohrgambiet</strong></p></td>
-<td></td>
+<td colspan="2"><p><strong>uitslagen Flohrgambiet</strong></p></td>
</tr>
<tr class="even">
<td><p>totaal aantal partijen Simbase</p></td>
@@ -90,20 +89,20 @@
<tbody>
<tr class="odd">
<td><p>1</p></td>
-<td><p><a href="Sébastien_Loeb" title="wikilink">Sébastien
-Loeb</a></p></td>
+<td style="text-align: left;"><p><a href="Sébastien_Loeb"
+title="wikilink">Sébastien Loeb</a></p></td>
<td><p>78</p></td>
</tr>
<tr class="even">
<td><p>2</p></td>
-<td><p><strong><a href="Sébastien_Ogier" title="wikilink">Sébastien
-Ogier</a></strong></p></td>
+<td style="text-align: left;"><p><strong><a href="Sébastien_Ogier"
+title="wikilink">Sébastien Ogier</a></strong></p></td>
<td><p>38</p></td>
</tr>
<tr class="odd">
<td><p>10</p></td>
-<td><p><a href="Hannu_Mikkola" title="wikilink">Hannu
-Mikkola</a></p></td>
+<td style="text-align: left;"><p><a href="Hannu_Mikkola"
+title="wikilink">Hannu Mikkola</a></p></td>
<td><p>18</p></td>
</tr>
</tbody>
diff --git a/test/mediawiki-reader.native b/test/mediawiki-reader.native
index 184809ea6..9b0a61b4d 100644
--- a/test/mediawiki-reader.native
+++ b/test/mediawiki-reader.native
@@ -820,24 +820,7 @@ Pandoc
[ ( AlignDefault , ColWidthDefault )
, ( AlignDefault , ColWidthDefault )
]
- (TableHead
- ( "" , [] , [] )
- [ Row
- ( "" , [] , [] )
- [ Cell
- ( "" , [] , [] )
- AlignDefault
- (RowSpan 1)
- (ColSpan 1)
- []
- , Cell
- ( "" , [] , [] )
- AlignDefault
- (RowSpan 1)
- (ColSpan 1)
- []
- ]
- ])
+ (TableHead ( "" , [] , [] ) [])
[ TableBody
( "" , [] , [] )
(RowHeadColumns 0)
@@ -1016,30 +999,7 @@ Pandoc
, ( AlignDefault , ColWidthDefault )
, ( AlignDefault , ColWidthDefault )
]
- (TableHead
- ( "" , [] , [] )
- [ Row
- ( "" , [] , [] )
- [ Cell
- ( "" , [] , [] )
- AlignDefault
- (RowSpan 1)
- (ColSpan 1)
- []
- , Cell
- ( "" , [] , [] )
- AlignDefault
- (RowSpan 1)
- (ColSpan 1)
- []
- , Cell
- ( "" , [] , [] )
- AlignDefault
- (RowSpan 1)
- (ColSpan 1)
- []
- ]
- ])
+ (TableHead ( "" , [] , [] ) [])
[ TableBody
( "" , [] , [] )
(RowHeadColumns 0)
@@ -1122,20 +1082,20 @@ Pandoc
[ Row
( "" , [] , [] )
[ Cell
- ( "" , [] , [] )
- AlignDefault
+ ( "" , [] , [ ( "width" , "50%" ) ] )
+ AlignLeft
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "Left" ] ]
, Cell
( "" , [] , [] )
- AlignDefault
+ AlignRight
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "Right" ] ]
, Cell
( "" , [] , [] )
- AlignDefault
+ AlignCenter
(RowSpan 1)
(ColSpan 1)
[ Para [ Str "Center" ] ]
@@ -1196,24 +1156,7 @@ Pandoc
[ ( AlignDefault , ColWidthDefault )
, ( AlignDefault , ColWidthDefault )
]
- (TableHead
- ( "" , [] , [] )
- [ Row
- ( "" , [] , [] )
- [ Cell
- ( "" , [] , [] )
- AlignDefault
- (RowSpan 1)
- (ColSpan 1)
- []
- , Cell
- ( "" , [] , [] )
- AlignDefault
- (RowSpan 1)
- (ColSpan 1)
- []
- ]
- ])
+ (TableHead ( "" , [] , [] ) [])
[ TableBody
( "" , [] , [] )
(RowHeadColumns 0)
@@ -1321,18 +1264,7 @@ Pandoc
( "" , [] , [] )
(Caption Nothing [])
[ ( AlignDefault , ColWidthDefault ) ]
- (TableHead
- ( "" , [] , [] )
- [ Row
- ( "" , [] , [] )
- [ Cell
- ( "" , [] , [] )
- AlignDefault
- (RowSpan 1)
- (ColSpan 1)
- []
- ]
- ])
+ (TableHead ( "" , [] , [] ) [])
[ TableBody
( "" , [] , [] )
(RowHeadColumns 0)