diff options
| author | John MacFarlane <[email protected]> | 2023-07-10 09:28:48 -0700 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2023-07-11 10:59:33 -0700 |
| commit | 94832af98a9e3e3e4ea0f88bc2bb29981df85e74 (patch) | |
| tree | d755692783aad8387531c01ec3aa3abfc12ecd7b /src/Text | |
| parent | 3496109d49bdf2de5d28435c04ef849b00d9e421 (diff) | |
SelfContained: Use inline svg instead of data uris...
for SVG images in HTML5. Closes #8948.
Note that SelfContained does not have access to the writer
name, so we check for HTML5 by determining whether the document
starts with `<DOCTYPE! html>`. This means that inline SVG
won't be used when generating document fragments.
An API change could be contemplated to give more flexibility,
but this is okay for now.
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 92 | ||||
| -rw-r--r-- | src/Text/Pandoc/Shared.hs | 2 |
2 files changed, 84 insertions, 10 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 90aae0371..69f696558 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE TupleSections #-} @@ -17,13 +18,13 @@ the HTML using data URIs. module Text.Pandoc.SelfContained ( makeDataURI, makeSelfContained ) where import Codec.Compression.GZip as Gzip import Control.Applicative ((<|>)) -import Control.Monad.Trans (lift) import Data.ByteString (ByteString) import Data.ByteString.Base64 (encodeBase64) import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as L import qualified Data.Text as T import Data.Char (isAlphaNum, isAscii) +import Data.Digest.Pure.SHA (sha1, showDigest) import Network.URI (escapeURIString) import System.FilePath (takeDirectory, takeExtension, (</>)) import Text.HTML.TagSoup @@ -32,12 +33,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), fetchItem, import Text.Pandoc.Logging import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.Shared (renderTags', trim, tshow) +import Text.Pandoc.Shared (renderTags', trim, tshow, safeRead) import Text.Pandoc.URI (isURI) import Text.Pandoc.UTF8 (toString, toText, fromText) import Text.Pandoc.Parsing (ParsecT, runParserT) import qualified Text.Pandoc.Parsing as P import Control.Monad.Except (throwError, catchError) +import Data.Either (lefts, rights) +import Data.Maybe (isNothing) +import qualified Data.Map as M +import Control.Monad.State +-- import Debug.Trace isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c @@ -60,7 +66,14 @@ isSourceAttribute tagname (x,_) = x == "poster" || x == "data-background-image" -convertTags :: PandocMonad m => [Tag T.Text] -> m [Tag T.Text] +data ConvertState = + ConvertState + { isHtml5 :: Bool + , svgMap :: M.Map T.Text T.Text -- map from hash to id + } deriving (Show) + +convertTags :: PandocMonad m => + [Tag T.Text] -> StateT ConvertState m [Tag T.Text] convertTags [] = return [] convertTags (t@TagOpen{}:ts) | fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts @@ -132,20 +145,76 @@ convertTags (t@(TagOpen tagname as):ts) | any (isSourceAttribute tagname) as = do as' <- mapM processAttribute as + let attrs = rights as' + let svgContents = lefts as' rest <- convertTags ts - return $ TagOpen tagname as' : rest + case svgContents of + [] -> return $ TagOpen tagname attrs : rest + ((hash, tags) : _) -> do + svgmap <- gets svgMap + case M.lookup hash svgmap of + Just svgid -> do + let attrs' = if ("id", svgid) `elem` attrs + then [(k,v) | (k,v) <- attrs, k /= "id"] + else attrs + return $ TagOpen "svg" attrs' : + TagOpen "use" [("href", "#" <> svgid)] : + TagClose "use" : rest + Nothing -> + case dropWhile (not . isTagOpenName "svg") tags of + TagOpen "svg" svgattrs : tags' -> do + let attrs' = combineSvgAttrs svgattrs attrs + let (svgId, attrs'') = -- keep original image id if present + case lookup "id" as of + Just id' -> (id', attrs') + Nothing -> + let newid = "svg_" <> hash + in (newid, ("id", newid) : + filter (\(k,_) -> k /= "id") attrs') + modify $ \st -> st{ svgMap = M.insert hash svgId (svgMap st) } + return $ (TagOpen "svg" attrs'' : tags') ++ rest + _ -> return $ TagOpen tagname attrs : rest where processAttribute (x,y) = if isSourceAttribute tagname (x,y) then do res <- getData (fromAttrib "type" t) y case res of - AlreadyDataURI enc -> return (x, enc) - Fetched (mt,bs) -> return (x, makeDataURI (mt,bs)) - CouldNotFetch _ -> return (x, y) - else return (x,y) + AlreadyDataURI enc -> return $ Right (x, enc) + Fetched ("image/svg+xml", bs) -> do + let hash = T.pack (showDigest (sha1 (L.fromStrict bs))) + return $ Left (hash, parseTags (toText bs)) + Fetched (mt,bs) -> return $ Right (x, makeDataURI (mt,bs)) + CouldNotFetch _ -> return $ Right (x, y) + else return $ Right (x,y) convertTags (t:ts) = (t:) <$> convertTags ts +combineSvgAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] -> [(T.Text, T.Text)] +combineSvgAttrs svgAttrs imgAttrs = + case (mbViewBox, mbHeight, mbWidth) of + (Nothing, Just h, Just w) -> -- calculate viewBox + combinedAttrs ++ [("viewBox", T.unwords ["0", "0", tshow w, tshow h])] + (Just (llx,lly,urx,ury), Nothing, Nothing) -> -- calculate width, height + combinedAttrs ++ + [ ("width", tshow (floor urx - floor llx :: Int)) + , ("height", tshow (floor ury - floor lly :: Int)) ] + _ -> combinedAttrs + where + combinedAttrs = imgAttrs ++ + [(k,v) | (k,v) <- svgAttrs, isNothing (lookup k imgAttrs)] + parseViewBox t = + case map (safeRead . addZero) $ T.words t of + [Just llx, Just lly, Just urx, Just ury] -> Just (llx, lly, urx, ury) + _ -> Nothing + addZero t = + if "-." `T.isPrefixOf` t + then "-0." <> T.drop 2 t -- safeRead fails on -.33, needs -0.33 + else t + (mbViewBox :: Maybe (Double, Double, Double, Double)) = + lookup "viewBox" svgAttrs >>= parseViewBox + (mbHeight :: Maybe Int) = lookup "height" combinedAttrs >>= safeRead + (mbWidth :: Maybe Int) = lookup "width" combinedAttrs >>= safeRead + cssURLs :: PandocMonad m => FilePath -> ByteString -> m ByteString cssURLs d orig = do @@ -293,5 +362,10 @@ getData mimetype src makeSelfContained :: PandocMonad m => T.Text -> m T.Text makeSelfContained inp = do let tags = parseTags inp - out' <- convertTags tags + let html5 = case tags of + (TagOpen "!DOCTYPE" [("html","")]:_) -> True + _ -> False + let convertState = ConvertState { isHtml5 = html5, + svgMap = mempty } + out' <- evalStateT (convertTags tags) convertState return $ renderTags' out' diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 115e59907..125701a83 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -758,7 +758,7 @@ formatCode attr = B.fromList . walk fmt . B.toList renderTags' :: [Tag T.Text] -> T.Text renderTags' = renderTagsOptions renderOptions{ optMinimize = matchTags ["hr", "br", "img", - "meta", "link", "col"] + "meta", "link", "col", "use"] , optRawTag = matchTags ["script", "style"] } where matchTags tags = flip elem tags . T.toLower |
