diff options
| author | John MacFarlane <[email protected]> | 2024-06-22 18:48:55 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2024-06-22 18:48:55 -0700 |
| commit | bee5b1fcd1d5900832b0e7e9bdcc81c8a3f5fab4 (patch) | |
| tree | 97ee019f81114c985a59e8678a9acc81318a4f58 /src/Text | |
| parent | 2b60b1a1bc062f37174cb1c29178c5aa02f7c651 (diff) | |
HTML writer: ensure URI escaping needed for html4.
Unicode characters need not be escaped for html5, and still won't be.
See #9905.
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 47 |
1 files changed, 29 insertions, 18 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index e6c228eb0..c03ffaac2 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -34,7 +34,7 @@ import Control.Monad.State.Strict ( StateT, MonadState(get), gets, modify, evalStateT ) import Control.Monad ( liftM, when, foldM, unless ) import Control.Monad.Trans ( MonadTrans(lift) ) -import Data.Char (ord) +import Data.Char (ord, isSpace, isAscii) import Data.List (intercalate, intersperse, partition, delete, (\\), foldl') import Data.List.NonEmpty (NonEmpty((:|))) import Data.Containers.ListUtils (nubOrd) @@ -43,7 +43,8 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL -import Network.URI (URI (..), parseURIReference) +import Network.URI (URI (..), parseURIReference, escapeURIString) +import Text.Pandoc.URI (urlEncode) import Numeric (showHex) import Text.DocLayout (render, literal, Doc) import Text.Blaze.Internal (MarkupM (Empty), customLeaf, customParent) @@ -61,7 +62,6 @@ import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import qualified Text.Pandoc.Writers.AnnotatedTable as Ann -import Text.Pandoc.URI (urlEncode) import Text.Pandoc.XML (escapeStringForXML, fromEntities, toEntities, html5Attributes, html4Attributes, rdfaAttributes) import qualified Text.Blaze.XHtml5 as H5 @@ -308,12 +308,13 @@ pandocToHtml opts (Pandoc meta blocks) = do modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') }) return notes st <- get + let html5 = stHtml5 st let thebody = blocks' >> notes let math = layoutMarkup $ case writerHTMLMathMethod opts of MathJax url | slideVariant /= RevealJsSlides -> -- mathjax is handled via a special plugin in revealjs - H.script ! A.src (toValue url) + H.script ! A.src (toValue $ toURI html5 url) ! A.type_ "text/javascript" $ case slideVariant of SlideousSlides -> @@ -323,7 +324,7 @@ pandocToHtml opts (Pandoc meta blocks) = do KaTeX url -> do H.script ! A.defer mempty ! - A.src (toValue $ url <> "katex.min.js") $ mempty + A.src (toValue $ toURI html5 $ url <> "katex.min.js") $ mempty nl let katexFlushLeft = case lookupContext "classoption" metadata of @@ -346,7 +347,7 @@ pandocToHtml opts (Pandoc meta blocks) = do ] nl H.link ! A.rel "stylesheet" ! - A.href (toValue $ url <> "katex.min.css") + A.href (toValue $ toURI html5 url <> "katex.min.css") _ -> mempty let mCss :: Maybe [Text] = lookupContext "css" metadata @@ -593,12 +594,14 @@ parseMailto s = obfuscateLink :: PandocMonad m => WriterOptions -> Attr -> Html -> Text -> StateT WriterState m Html -obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = - addAttrs opts attr $ H.a ! A.href (toValue s) $ txt -obfuscateLink opts attr (TL.toStrict . renderHtml -> txt) s = +obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = do + html5 <- gets stHtml5 + addAttrs opts attr $ H.a ! A.href (toValue $ toURI html5 s) $ txt +obfuscateLink opts attr (TL.toStrict . renderHtml -> txt) s = do + html5 <- gets stHtml5 let meth = writerEmailObfuscation opts - s' = T.toLower (T.take 7 s) <> T.drop 7 s - in case parseMailto s' of + let s' = T.toLower (T.take 7 s) <> T.drop 7 s + case parseMailto s' of (Just (name', domain)) -> let domain' = T.replace "." " dot " domain at' = obfuscateChar '@' @@ -626,7 +629,8 @@ obfuscateLink opts attr (TL.toStrict . renderHtml -> txt) s = linkText <> "+'<\\/'+'a'+'>');\n// -->\n")) >> H.noscript (preEscapedText $ obfuscateString altText) _ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " <> tshow meth - _ -> addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email + _ -> addAttrs opts attr $ H.a ! A.href (toValue $ toURI html5 s) + $ toHtml txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> Text @@ -1488,8 +1492,7 @@ inlineToHtml opts inline = do InlineMath -> "\\textstyle " DisplayMath -> "\\displaystyle " return $ imtag ! A.style "vertical-align:middle" - ! A.src (toValue . (url <>) . - urlEncode $ s <> str') + ! A.src (toValue . (url <>) . urlEncode $ s <> str') ! A.alt (toValue str') ! A.title (toValue str') ! A.class_ mathClass @@ -1551,7 +1554,8 @@ inlineToHtml opts inline = do else writerIdentifierPrefix opts in "#" <> prefix <> xs _ -> s - let link = H.a ! A.href (toValue s') $ linkText + let link = H.a ! A.href (toValue $ toURI html5 s') + $ linkText link' <- addAttrs opts (ident, classes, kvs) link return $ if T.null tit then link' @@ -1566,7 +1570,7 @@ inlineToHtml opts inline = do -- reveal.js uses data-src for lazy loading (if isReveal then customAttribute "data-src" $ toValue s - else A.src $ toValue s) : + else A.src $ toValue $ toURI html5 s) : [A.title $ toValue tit | not (T.null tit)] ++ attrs imageTag = (if html5 then H5.img else H.img @@ -1577,7 +1581,8 @@ inlineToHtml opts inline = do let linkTxt = if null txt then fallbackTxt else alternate - in (tg $ H.a ! A.href (toValue s) $ toHtml linkTxt + in (tg $ H.a ! A.href (toValue $ toURI html5 s) + $ toHtml linkTxt , [A5.controls ""] ) s' = fromMaybe s $ T.stripSuffix ".gz" s normSrc = maybe (T.unpack s) uriPath (parseURIReference $ T.unpack s') @@ -1600,7 +1605,7 @@ inlineToHtml opts inline = do modify $ \st -> st {stNotes = htmlContents:notes} slideVariant <- gets stSlideVariant let revealSlash = T.pack ['/' | slideVariant == RevealJsSlides] - let link = H.a ! A.href (toValue $ "#" <> + let link = H.a ! A.href (toValue $ toURI html5 $ "#" <> revealSlash <> writerIdentifierPrefix opts <> "fn" <> ref) ! A.class_ "footnote-ref" @@ -1760,3 +1765,9 @@ removeLinks = walk go where go (Link attr ils _) = Span attr ils go x = x + +toURI :: Bool -> Text -> Text +toURI isHtml5 t = if isHtml5 then t else escapeURI t + where + escapeURI = T.pack . escapeURIString (not . needsEscaping) . T.unpack + needsEscaping c = isSpace c || T.any (== c) "<>|\"{}[]^`" || not (isAscii c) |
