diff options
| author | Ruqi <[email protected]> | 2022-08-30 14:51:09 +0700 |
|---|---|---|
| committer | GitHub <[email protected]> | 2022-08-30 09:51:09 +0200 |
| commit | 8b5fb5019897138c9e65c1c8ec73feaf341c3efa (patch) | |
| tree | 96bef76af709e449b28eaeb681de50613baa81c9 /src/Text | |
| parent | fecd3366d8d9850131c5b0a2cbab53c65a030405 (diff) | |
Mediawiki reader: Parse table cell with attribs, to support rowspan, colspan (#8231)
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 42 |
2 files changed, 31 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 4fb6028e4..4e2be9382 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -21,6 +21,7 @@ module Text.Pandoc.Readers.HTML ( readHtml , isBlockTag , isTextTag , isCommentTag + , toAttr ) where import Control.Applicative ((<|>)) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 0fdfa8b84..8e86a0634 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested, tableCaption) -import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) +import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag, toAttr) import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, splitTextBy, tshow, formatCode) import Text.Pandoc.XML (fromEntities) @@ -222,22 +222,21 @@ table = do optional rowsep hasheader <- option False $ True <$ lookAhead (skipSpaces *> char '!') (cellspecs',hdr) <- unzip <$> tableRow - let widths = map ((tableWidth *) . snd) cellspecs' + let widths = map (tableWidth *) cellspecs' let restwidth = tableWidth - sum widths let zerocols = length $ filter (==0.0) widths let defaultwidth = if zerocols == 0 || zerocols == length widths then ColWidthDefault else ColWidth $ restwidth / fromIntegral zerocols let widths' = map (\w -> if w > 0 then ColWidth w else defaultwidth) widths - let cellspecs = zip (map fst cellspecs') widths' + let cellspecs = zip (calculateAlignments hdr) widths' rows' <- many $ try $ rowsep *> (map snd <$> tableRow) optional blanklines tableEnd - let cols = length hdr let (headers,rows) = if hasheader then (hdr, rows') - else (replicate cols mempty, hdr:rows') - let toRow = Row nullAttr . map B.simpleCell + else ([], hdr:rows') + let toRow = Row nullAttr toHeaderRow l = [toRow l | not (null l)] return $ B.table (B.simpleCaption $ B.plain caption) cellspecs @@ -245,6 +244,12 @@ table = do [TableBody nullAttr 0 [] $ map toRow rows] (TableFoot nullAttr []) +calculateAlignments :: [Cell] -> [Alignment] +calculateAlignments = map cellAligns + where + cellAligns :: Cell -> Alignment + cellAligns (Cell _ align _ _ _) = align + parseAttrs :: PandocMonad m => MWParser m [(Text,Text)] parseAttrs = many1 parseAttr @@ -252,7 +257,9 @@ parseAttr :: PandocMonad m => MWParser m (Text, Text) parseAttr = try $ do skipMany spaceChar k <- many1Char letter + skipMany spaceChar char '=' + skipMany spaceChar v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"')) <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) @@ -289,6 +296,7 @@ cellsep = try $ do tableCaption :: PandocMonad m => MWParser m Inlines tableCaption = try $ do + optional rowsep guardColumnOne skipSpaces sym "|+" @@ -296,14 +304,14 @@ tableCaption = try $ do trimInlines . mconcat <$> many (notFollowedBy (cellsep <|> rowsep) *> inline) -tableRow :: PandocMonad m => MWParser m [((Alignment, Double), Blocks)] +tableRow :: PandocMonad m => MWParser m [(Double, Cell)] tableRow = try $ skipMany htmlComment *> many tableCell -tableCell :: PandocMonad m => MWParser m ((Alignment, Double), Blocks) +tableCell :: PandocMonad m => MWParser m (Double, Cell) tableCell = try $ do cellsep skipMany spaceChar - attrs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <* + attribs <- option [] $ try $ parseAttrs <* skipSpaces <* char '|' <* notFollowedBy (char '|') skipMany spaceChar pos' <- getPosition @@ -311,15 +319,25 @@ tableCell = try $ do ((snd <$> withRaw table) <|> countChar 1 anyChar)) bs <- parseFromString (do setPosition pos' mconcat <$> many block) ls - let align = case lookup "align" attrs of + let align = case lookup "align" attribs of Just "left" -> AlignLeft Just "right" -> AlignRight Just "center" -> AlignCenter _ -> AlignDefault - let width = case lookup "width" attrs of + let width = case lookup "width" attribs of Just xs -> fromMaybe 0.0 $ parseWidth xs Nothing -> 0.0 - return ((align, width), bs) + let rowspan = RowSpan . fromMaybe 1 $ + safeRead =<< lookup "rowspan" attribs + let colspan = ColSpan . fromMaybe 1 $ + safeRead =<< lookup "colspan" attribs + let handledAttribs = ["align", "colspan", "rowspan"] + attribs' = foldr go [] attribs + go kv@(k, _) acc = case k of + -- drop attrib if it's already handled + _ | k `elem` handledAttribs -> acc + _ -> kv : acc + return (width, B.cellWith (toAttr attribs') align rowspan colspan bs) parseWidth :: Text -> Maybe Double parseWidth s = |
