aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2025-12-29 23:49:51 -0700
committerJohn MacFarlane <[email protected]>2025-12-29 23:49:51 -0700
commit7cadf231970c1a9ea03b596525511fc6ffef09d4 (patch)
treeb4103423acaf296e6a7680b160b76da25b12716a
parente5ee5ed04ff75c577d25eaeb0f70ee3936dfe84e (diff)
MediaWiki reader: handle non-recognized tags as plain text.
Partially addresses #11299.
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs110
-rw-r--r--test/command/11299.md6
2 files changed, 99 insertions, 17 deletions
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index be7bdc4a2..d6e01d703 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -36,7 +36,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (tableCaption)
-import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag, toAttr)
+import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag, toAttr)
import Text.Pandoc.Shared (formatCode, safeRead, splitTextBy, stringify,
stripTrailingNewlines, trim, tshow)
import Text.Pandoc.XML (fromEntities)
@@ -104,21 +104,86 @@ newBlockTags :: [Text]
newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
isBlockTag' :: Tag Text -> Bool
-isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
- t `notElem` eitherBlockOrInline
-isBlockTag' (TagClose "ref") = True -- needed so 'special' doesn't parse it
-isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
- t `notElem` eitherBlockOrInline
-isBlockTag' tag = isBlockTag tag
+isBlockTag' (TagOpen t _) = isBlockTagName t
+isBlockTag' (TagClose t) = isBlockTagName t
+isBlockTag' _ = False
+
+isBlockTagName :: Text -> Bool
+isBlockTagName t =
+ t `elem` [ "blockquote"
+ , "caption"
+ , "col"
+ , "colgroup"
+ , "dd"
+ , "div"
+ , "dl"
+ , "dt"
+ , "h1"
+ , "h2"
+ , "h3"
+ , "h4"
+ , "h5"
+ , "h6"
+ , "hr"
+ , "li"
+ , "meta"
+ , "ol"
+ , "p"
+ , "pre"
+ , "rp"
+ , "table"
+ , "td"
+ , "th"
+ , "time"
+ , "tr"
+ , "ul"
+ , "center"
+ ] || t `elem` newBlockTags
isInlineTag' :: Tag Text -> Bool
-isInlineTag' (TagComment _) = True
-isInlineTag' (TagClose "ref") = False -- see below inlineTag
-isInlineTag' t = not (isBlockTag' t)
-
-eitherBlockOrInline :: [Text]
-eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
- "map", "area", "object"]
+isInlineTag' (TagComment _) = True
+isInlineTag' (TagOpen t _) = isInlineTagName t
+isInlineTag' (TagClose t) = isInlineTagName t
+isInlineTag' _ = False
+
+isInlineTagName :: Text -> Bool
+isInlineTagName t =
+ t `elem` [ "abbr"
+ , "b"
+ , "bdi"
+ , "bdo"
+ , "big"
+ , "br"
+ , "cite"
+ , "code"
+ , "data"
+ , "del"
+ , "dfn"
+ , "em"
+ , "i"
+ , "ins"
+ , "kbd"
+ , "link"
+ , "mark"
+ , "q"
+ , "rt"
+ , "ruby"
+ , "s"
+ , "samp"
+ , "small"
+ , "span"
+ , "strong"
+ , "sub"
+ , "sup"
+ , "u"
+ , "var"
+ , "wbr"
+ , "font"
+ , "rb"
+ , "rtc"
+ , "strike"
+ , "tt"
+ ]
htmlComment :: PandocMonad m => MWParser m ()
htmlComment = () <$ htmlTag isCommentTag
@@ -575,7 +640,11 @@ singleParaToPlain bs =
inlineTag :: PandocMonad m => MWParser m Inlines
inlineTag = do
- (tag, _) <- lookAhead $ htmlTag isInlineTag'
+ (tag, _) <- lookAhead $ htmlTag (\tag -> case tag of
+ TagOpen "hask" _ -> True
+ TagOpen "ref" _ -> True
+ TagOpen "nowiki" _ -> True
+ _ -> isInlineTag' tag)
case tag of
TagOpen "ref" _ -> B.note . singleParaToPlain <$> blocksInTags "ref"
TagOpen "nowiki" _ -> try $ do
@@ -601,8 +670,15 @@ inlineTag = do
_ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
special :: PandocMonad m => MWParser m Inlines
-special = B.str <$> countChar 1 (notFollowedBy' (htmlTag isBlockTag') *>
- oneOf specialChars)
+special = B.str . T.singleton <$>
+ (notFollowedBy' (htmlTag (\t -> isInlineTag' t ||
+ isBlockTag' t ||
+ case t of
+ TagClose "ref" -> True
+ TagClose "hask" -> True
+ TagClose "nowiki" -> True
+ _ -> False)
+ ) *> oneOf specialChars)
inlineHtml :: PandocMonad m => MWParser m Inlines
inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
diff --git a/test/command/11299.md b/test/command/11299.md
new file mode 100644
index 000000000..3c7995cb2
--- /dev/null
+++ b/test/command/11299.md
@@ -0,0 +1,6 @@
+```
+% pandoc -f mediawiki -t native
+<foo>
+^D
+[ Para [ Str "<foo>" ] ]
+```