aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2024-06-22 18:48:55 -0700
committerJohn MacFarlane <[email protected]>2024-06-22 18:48:55 -0700
commitbee5b1fcd1d5900832b0e7e9bdcc81c8a3f5fab4 (patch)
tree97ee019f81114c985a59e8678a9acc81318a4f58 /src/Text
parent2b60b1a1bc062f37174cb1c29178c5aa02f7c651 (diff)
HTML writer: ensure URI escaping needed for html4.
Unicode characters need not be escaped for html5, and still won't be. See #9905.
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs47
1 files changed, 29 insertions, 18 deletions
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index e6c228eb0..c03ffaac2 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -34,7 +34,7 @@ import Control.Monad.State.Strict
( StateT, MonadState(get), gets, modify, evalStateT )
import Control.Monad ( liftM, when, foldM, unless )
import Control.Monad.Trans ( MonadTrans(lift) )
-import Data.Char (ord)
+import Data.Char (ord, isSpace, isAscii)
import Data.List (intercalate, intersperse, partition, delete, (\\), foldl')
import Data.List.NonEmpty (NonEmpty((:|)))
import Data.Containers.ListUtils (nubOrd)
@@ -43,7 +43,8 @@ import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
-import Network.URI (URI (..), parseURIReference)
+import Network.URI (URI (..), parseURIReference, escapeURIString)
+import Text.Pandoc.URI (urlEncode)
import Numeric (showHex)
import Text.DocLayout (render, literal, Doc)
import Text.Blaze.Internal (MarkupM (Empty), customLeaf, customParent)
@@ -61,7 +62,6 @@ import Text.Pandoc.Walk
import Text.Pandoc.Writers.Math
import Text.Pandoc.Writers.Shared
import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
-import Text.Pandoc.URI (urlEncode)
import Text.Pandoc.XML (escapeStringForXML, fromEntities, toEntities,
html5Attributes, html4Attributes, rdfaAttributes)
import qualified Text.Blaze.XHtml5 as H5
@@ -308,12 +308,13 @@ pandocToHtml opts (Pandoc meta blocks) = do
modify (\st' -> st'{ stNotes = mempty, stEmittedNotes = stEmittedNotes st' + length (stNotes st') })
return notes
st <- get
+ let html5 = stHtml5 st
let thebody = blocks' >> notes
let math = layoutMarkup $ case writerHTMLMathMethod opts of
MathJax url
| slideVariant /= RevealJsSlides ->
-- mathjax is handled via a special plugin in revealjs
- H.script ! A.src (toValue url)
+ H.script ! A.src (toValue $ toURI html5 url)
! A.type_ "text/javascript"
$ case slideVariant of
SlideousSlides ->
@@ -323,7 +324,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
KaTeX url -> do
H.script !
A.defer mempty !
- A.src (toValue $ url <> "katex.min.js") $ mempty
+ A.src (toValue $ toURI html5 $ url <> "katex.min.js") $ mempty
nl
let katexFlushLeft =
case lookupContext "classoption" metadata of
@@ -346,7 +347,7 @@ pandocToHtml opts (Pandoc meta blocks) = do
]
nl
H.link ! A.rel "stylesheet" !
- A.href (toValue $ url <> "katex.min.css")
+ A.href (toValue $ toURI html5 url <> "katex.min.css")
_ -> mempty
let mCss :: Maybe [Text] = lookupContext "css" metadata
@@ -593,12 +594,14 @@ parseMailto s =
obfuscateLink :: PandocMonad m
=> WriterOptions -> Attr -> Html -> Text
-> StateT WriterState m Html
-obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation =
- addAttrs opts attr $ H.a ! A.href (toValue s) $ txt
-obfuscateLink opts attr (TL.toStrict . renderHtml -> txt) s =
+obfuscateLink opts attr txt s | writerEmailObfuscation opts == NoObfuscation = do
+ html5 <- gets stHtml5
+ addAttrs opts attr $ H.a ! A.href (toValue $ toURI html5 s) $ txt
+obfuscateLink opts attr (TL.toStrict . renderHtml -> txt) s = do
+ html5 <- gets stHtml5
let meth = writerEmailObfuscation opts
- s' = T.toLower (T.take 7 s) <> T.drop 7 s
- in case parseMailto s' of
+ let s' = T.toLower (T.take 7 s) <> T.drop 7 s
+ case parseMailto s' of
(Just (name', domain)) ->
let domain' = T.replace "." " dot " domain
at' = obfuscateChar '@'
@@ -626,7 +629,8 @@ obfuscateLink opts attr (TL.toStrict . renderHtml -> txt) s =
linkText <> "+'<\\/'+'a'+'>');\n// -->\n")) >>
H.noscript (preEscapedText $ obfuscateString altText)
_ -> throwError $ PandocSomeError $ "Unknown obfuscation method: " <> tshow meth
- _ -> addAttrs opts attr $ H.a ! A.href (toValue s) $ toHtml txt -- malformed email
+ _ -> addAttrs opts attr $ H.a ! A.href (toValue $ toURI html5 s)
+ $ toHtml txt -- malformed email
-- | Obfuscate character as entity.
obfuscateChar :: Char -> Text
@@ -1488,8 +1492,7 @@ inlineToHtml opts inline = do
InlineMath -> "\\textstyle "
DisplayMath -> "\\displaystyle "
return $ imtag ! A.style "vertical-align:middle"
- ! A.src (toValue . (url <>) .
- urlEncode $ s <> str')
+ ! A.src (toValue . (url <>) . urlEncode $ s <> str')
! A.alt (toValue str')
! A.title (toValue str')
! A.class_ mathClass
@@ -1551,7 +1554,8 @@ inlineToHtml opts inline = do
else writerIdentifierPrefix opts
in "#" <> prefix <> xs
_ -> s
- let link = H.a ! A.href (toValue s') $ linkText
+ let link = H.a ! A.href (toValue $ toURI html5 s')
+ $ linkText
link' <- addAttrs opts (ident, classes, kvs) link
return $ if T.null tit
then link'
@@ -1566,7 +1570,7 @@ inlineToHtml opts inline = do
-- reveal.js uses data-src for lazy loading
(if isReveal
then customAttribute "data-src" $ toValue s
- else A.src $ toValue s) :
+ else A.src $ toValue $ toURI html5 s) :
[A.title $ toValue tit | not (T.null tit)] ++
attrs
imageTag = (if html5 then H5.img else H.img
@@ -1577,7 +1581,8 @@ inlineToHtml opts inline = do
let linkTxt = if null txt
then fallbackTxt
else alternate
- in (tg $ H.a ! A.href (toValue s) $ toHtml linkTxt
+ in (tg $ H.a ! A.href (toValue $ toURI html5 s)
+ $ toHtml linkTxt
, [A5.controls ""] )
s' = fromMaybe s $ T.stripSuffix ".gz" s
normSrc = maybe (T.unpack s) uriPath (parseURIReference $ T.unpack s')
@@ -1600,7 +1605,7 @@ inlineToHtml opts inline = do
modify $ \st -> st {stNotes = htmlContents:notes}
slideVariant <- gets stSlideVariant
let revealSlash = T.pack ['/' | slideVariant == RevealJsSlides]
- let link = H.a ! A.href (toValue $ "#" <>
+ let link = H.a ! A.href (toValue $ toURI html5 $ "#" <>
revealSlash <>
writerIdentifierPrefix opts <> "fn" <> ref)
! A.class_ "footnote-ref"
@@ -1760,3 +1765,9 @@ removeLinks = walk go
where
go (Link attr ils _) = Span attr ils
go x = x
+
+toURI :: Bool -> Text -> Text
+toURI isHtml5 t = if isHtml5 then t else escapeURI t
+ where
+ escapeURI = T.pack . escapeURIString (not . needsEscaping) . T.unpack
+ needsEscaping c = isSpace c || T.any (== c) "<>|\"{}[]^`" || not (isAscii c)