diff options
| author | John MacFarlane <[email protected]> | 2024-08-31 10:16:00 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2024-08-31 10:17:00 -0700 |
| commit | 49e82f9a7fa22e8138cb6aa59d5a1fca7b2f3204 (patch) | |
| tree | 5df4cb208d974c68196c041a1a2534a89507a815 /src/Text | |
| parent | 7a83cac6c3bb38e629a78af03fd0b5f75c636c88 (diff) | |
HTML reader: better handle KaTeX-generated math.
KaTeX emits the mathml followed by a span with an HTML fallback.
Previously pandoc was converting both. We now ignore the HTML
fallback span, marked with class `katex-html`.
Closes #9971.
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 38 |
1 files changed, 20 insertions, 18 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 5ee990db4..6fcf574c4 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -512,7 +512,7 @@ pIframe = try $ do | "image/" `T.isPrefixOf` mt -> do return $ B.divWith ("",["iframe"],[]) $ B.plain $ B.image url "" mempty - _ -> return $ B.divWith ("",["iframe"],[("src", url)]) $ mempty) + _ -> return $ B.divWith ("",["iframe"],[("src", url)]) mempty) (\e -> do logMessage $ CouldNotFetchResource url (renderError e) ignore $ renderTags' [tag, TagClose "iframe"]) @@ -852,23 +852,25 @@ pSpan :: PandocMonad m => TagParser m Inlines pSpan = do (TagOpen _ attr') <- lookAhead (pSatisfy $ tagOpen (=="span") (const True)) exts <- getOption readerExtensions - if extensionEnabled Ext_native_spans exts - then do - contents <- pInTags "span" inline - let attr = toAttr attr' - let classes = maybe [] T.words $ lookup "class" attr' - let styleAttr = fromMaybe "" $ lookup "style" attr' - let fontVariant = fromMaybe "" $ - pickStyleAttrProps ["font-variant"] styleAttr - let isSmallCaps = fontVariant == "small-caps" || - "smallcaps" `elem` classes - let tag = if isSmallCaps then B.smallcaps else B.spanWith attr - return $ tag contents - else if extensionEnabled Ext_raw_html exts - then do - tag <- pSatisfy $ tagOpen (=="span") (const True) - return $ B.rawInline "html" $ renderTags' [tag] - else pInTags "span" inline -- just contents + let attr = toAttr attr' + case attr of + (_,["katex-html"],_) -> mempty <$ pInTags "span" inline + -- skip HTML generated by KaTeX, since we get + -- the math by parsing mathml (#9971) + _ | extensionEnabled Ext_native_spans exts -> do + contents <- pInTags "span" inline + let classes = maybe [] T.words $ lookup "class" attr' + let styleAttr = fromMaybe "" $ lookup "style" attr' + let fontVariant = fromMaybe "" $ + pickStyleAttrProps ["font-variant"] styleAttr + let isSmallCaps = fontVariant == "small-caps" || + "smallcaps" `elem` classes + let tag = if isSmallCaps then B.smallcaps else B.spanWith attr + return $ tag contents + | extensionEnabled Ext_raw_html exts -> do + tag <- pSatisfy $ tagOpen (=="span") (const True) + return $ B.rawInline "html" $ renderTags' [tag] + | otherwise -> pInTags "span" inline -- just contents pRawHtmlInline :: PandocMonad m => TagParser m Inlines pRawHtmlInline = do |
