aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2023-07-10 09:28:48 -0700
committerJohn MacFarlane <[email protected]>2023-07-11 10:59:33 -0700
commit94832af98a9e3e3e4ea0f88bc2bb29981df85e74 (patch)
treed755692783aad8387531c01ec3aa3abfc12ecd7b
parent3496109d49bdf2de5d28435c04ef849b00d9e421 (diff)
SelfContained: Use inline svg instead of data uris...
for SVG images in HTML5. Closes #8948. Note that SelfContained does not have access to the writer name, so we check for HTML5 by determining whether the document starts with `<DOCTYPE! html>`. This means that inline SVG won't be used when generating document fragments. An API change could be contemplated to give more flexibility, but this is okay for now.
-rw-r--r--pandoc.cabal5
-rw-r--r--src/Text/Pandoc/SelfContained.hs92
-rw-r--r--src/Text/Pandoc/Shared.hs2
-rw-r--r--test/command/8948.md21
-rw-r--r--test/command/minimal.svg5
5 files changed, 111 insertions, 14 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index a9b2e0b20..07128c781 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -206,6 +206,7 @@ extra-source-files:
test/*.native
test/command/*.md
test/command/*.csl
+ test/command/*.svg
test/command/biblio.bib
test/command/averroes.bib
test/command/A.txt
@@ -235,10 +236,6 @@ extra-source-files:
test/command/5700-metadata-file-1.yml
test/command/5700-metadata-file-2.yml
test/command/abbrevs
- test/command/SVG_logo-without-xml-declaration.svg
- test/command/SVG_logo.svg
- test/command/corrupt.svg
- test/command/inkscape-cube.svg
test/command/sub-file-chapter-1.tex
test/command/sub-file-chapter-2.tex
test/command/bar.tex
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 90aae0371..69f696558 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TupleSections #-}
@@ -17,13 +18,13 @@ the HTML using data URIs.
module Text.Pandoc.SelfContained ( makeDataURI, makeSelfContained ) where
import Codec.Compression.GZip as Gzip
import Control.Applicative ((<|>))
-import Control.Monad.Trans (lift)
import Data.ByteString (ByteString)
import Data.ByteString.Base64 (encodeBase64)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy as L
import qualified Data.Text as T
import Data.Char (isAlphaNum, isAscii)
+import Data.Digest.Pure.SHA (sha1, showDigest)
import Network.URI (escapeURIString)
import System.FilePath (takeDirectory, takeExtension, (</>))
import Text.HTML.TagSoup
@@ -32,12 +33,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), fetchItem,
import Text.Pandoc.Logging
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.Shared (renderTags', trim, tshow)
+import Text.Pandoc.Shared (renderTags', trim, tshow, safeRead)
import Text.Pandoc.URI (isURI)
import Text.Pandoc.UTF8 (toString, toText, fromText)
import Text.Pandoc.Parsing (ParsecT, runParserT)
import qualified Text.Pandoc.Parsing as P
import Control.Monad.Except (throwError, catchError)
+import Data.Either (lefts, rights)
+import Data.Maybe (isNothing)
+import qualified Data.Map as M
+import Control.Monad.State
+-- import Debug.Trace
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
@@ -60,7 +66,14 @@ isSourceAttribute tagname (x,_) =
x == "poster" ||
x == "data-background-image"
-convertTags :: PandocMonad m => [Tag T.Text] -> m [Tag T.Text]
+data ConvertState =
+ ConvertState
+ { isHtml5 :: Bool
+ , svgMap :: M.Map T.Text T.Text -- map from hash to id
+ } deriving (Show)
+
+convertTags :: PandocMonad m =>
+ [Tag T.Text] -> StateT ConvertState m [Tag T.Text]
convertTags [] = return []
convertTags (t@TagOpen{}:ts)
| fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts
@@ -132,20 +145,76 @@ convertTags (t@(TagOpen tagname as):ts)
| any (isSourceAttribute tagname) as
= do
as' <- mapM processAttribute as
+ let attrs = rights as'
+ let svgContents = lefts as'
rest <- convertTags ts
- return $ TagOpen tagname as' : rest
+ case svgContents of
+ [] -> return $ TagOpen tagname attrs : rest
+ ((hash, tags) : _) -> do
+ svgmap <- gets svgMap
+ case M.lookup hash svgmap of
+ Just svgid -> do
+ let attrs' = if ("id", svgid) `elem` attrs
+ then [(k,v) | (k,v) <- attrs, k /= "id"]
+ else attrs
+ return $ TagOpen "svg" attrs' :
+ TagOpen "use" [("href", "#" <> svgid)] :
+ TagClose "use" : rest
+ Nothing ->
+ case dropWhile (not . isTagOpenName "svg") tags of
+ TagOpen "svg" svgattrs : tags' -> do
+ let attrs' = combineSvgAttrs svgattrs attrs
+ let (svgId, attrs'') = -- keep original image id if present
+ case lookup "id" as of
+ Just id' -> (id', attrs')
+ Nothing ->
+ let newid = "svg_" <> hash
+ in (newid, ("id", newid) :
+ filter (\(k,_) -> k /= "id") attrs')
+ modify $ \st -> st{ svgMap = M.insert hash svgId (svgMap st) }
+ return $ (TagOpen "svg" attrs'' : tags') ++ rest
+ _ -> return $ TagOpen tagname attrs : rest
where processAttribute (x,y) =
if isSourceAttribute tagname (x,y)
then do
res <- getData (fromAttrib "type" t) y
case res of
- AlreadyDataURI enc -> return (x, enc)
- Fetched (mt,bs) -> return (x, makeDataURI (mt,bs))
- CouldNotFetch _ -> return (x, y)
- else return (x,y)
+ AlreadyDataURI enc -> return $ Right (x, enc)
+ Fetched ("image/svg+xml", bs) -> do
+ let hash = T.pack (showDigest (sha1 (L.fromStrict bs)))
+ return $ Left (hash, parseTags (toText bs))
+ Fetched (mt,bs) -> return $ Right (x, makeDataURI (mt,bs))
+ CouldNotFetch _ -> return $ Right (x, y)
+ else return $ Right (x,y)
convertTags (t:ts) = (t:) <$> convertTags ts
+combineSvgAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+combineSvgAttrs svgAttrs imgAttrs =
+ case (mbViewBox, mbHeight, mbWidth) of
+ (Nothing, Just h, Just w) -> -- calculate viewBox
+ combinedAttrs ++ [("viewBox", T.unwords ["0", "0", tshow w, tshow h])]
+ (Just (llx,lly,urx,ury), Nothing, Nothing) -> -- calculate width, height
+ combinedAttrs ++
+ [ ("width", tshow (floor urx - floor llx :: Int))
+ , ("height", tshow (floor ury - floor lly :: Int)) ]
+ _ -> combinedAttrs
+ where
+ combinedAttrs = imgAttrs ++
+ [(k,v) | (k,v) <- svgAttrs, isNothing (lookup k imgAttrs)]
+ parseViewBox t =
+ case map (safeRead . addZero) $ T.words t of
+ [Just llx, Just lly, Just urx, Just ury] -> Just (llx, lly, urx, ury)
+ _ -> Nothing
+ addZero t =
+ if "-." `T.isPrefixOf` t
+ then "-0." <> T.drop 2 t -- safeRead fails on -.33, needs -0.33
+ else t
+ (mbViewBox :: Maybe (Double, Double, Double, Double)) =
+ lookup "viewBox" svgAttrs >>= parseViewBox
+ (mbHeight :: Maybe Int) = lookup "height" combinedAttrs >>= safeRead
+ (mbWidth :: Maybe Int) = lookup "width" combinedAttrs >>= safeRead
+
cssURLs :: PandocMonad m
=> FilePath -> ByteString -> m ByteString
cssURLs d orig = do
@@ -293,5 +362,10 @@ getData mimetype src
makeSelfContained :: PandocMonad m => T.Text -> m T.Text
makeSelfContained inp = do
let tags = parseTags inp
- out' <- convertTags tags
+ let html5 = case tags of
+ (TagOpen "!DOCTYPE" [("html","")]:_) -> True
+ _ -> False
+ let convertState = ConvertState { isHtml5 = html5,
+ svgMap = mempty }
+ out' <- evalStateT (convertTags tags) convertState
return $ renderTags' out'
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 115e59907..125701a83 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -758,7 +758,7 @@ formatCode attr = B.fromList . walk fmt . B.toList
renderTags' :: [Tag T.Text] -> T.Text
renderTags' = renderTagsOptions
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
- "meta", "link", "col"]
+ "meta", "link", "col", "use"]
, optRawTag = matchTags ["script", "style"] }
where matchTags tags = flip elem tags . T.toLower
diff --git a/test/command/8948.md b/test/command/8948.md
new file mode 100644
index 000000000..26e5ed7ea
--- /dev/null
+++ b/test/command/8948.md
@@ -0,0 +1,21 @@
+```
+% pandoc --embed-resources
+![minimal](command/minimal.svg)
+![minimal](command/minimal.svg)
+^D
+<p><svg alt="minimal"><use href="#svg_7868854ffb8f30209cd098bb6207b390b001249e" /></img> <svg id="svg_7868854ffb8f30209cd098bb6207b390b001249e" alt="minimal" viewBox="-.333 -.333 480 150" style="background-color:#ffffff00" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" width="481" height="151">
+ <path d="M 0 35.5 L 6.5 22.5 L 16 37 L 23 24 L 34.8 43.7 L 42.5 30 L 50.3 47 L 59.7 27.7 L 69 47 L 85 17.7 L 98.3 39 L 113 9.7 L 127.7 42.3 L 136.3 23.7 L 147 44.3 L 158.3 20.3 L 170.3 40.3 L 177.7 25.7 L 189.7 43 L 199.7 21 L 207.7 35 L 219 11 L 233 37 L 240.3 23.7 L 251 43 L 263 18.3 L 272.7 33.3 L 283 10 L 295 32.3 L 301.3 23 L 311.7 37 L 323.7 7.7 L 339.3 39 L 346.3 25.7 L 356.3 42.3 L 369.7 15 L 376.3 25.7 L 384 9 L 393 28.3 L 400.3 19 L 411.7 38.3 L 421 21 L 434.3 43 L 445 25 L 453 36.3 L 464.3 18.3 L 476.2 40.3 L 480 33.5 L 480 215 L 0 215 L 0 35.5 Z" fill="#175720"></path>
+</svg>
+</img></p>
+```
+
+```
+% pandoc --embed-resources
+![minimal](command/minimal.svg)
+![minimal](command/minimal.svg){#foo}
+^D
+<p><svg alt="minimal"><use href="#foo" /></img> <svg id="foo" alt="minimal" viewBox="-.333 -.333 480 150" style="background-color:#ffffff00" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" width="481" height="151">
+ <path d="M 0 35.5 L 6.5 22.5 L 16 37 L 23 24 L 34.8 43.7 L 42.5 30 L 50.3 47 L 59.7 27.7 L 69 47 L 85 17.7 L 98.3 39 L 113 9.7 L 127.7 42.3 L 136.3 23.7 L 147 44.3 L 158.3 20.3 L 170.3 40.3 L 177.7 25.7 L 189.7 43 L 199.7 21 L 207.7 35 L 219 11 L 233 37 L 240.3 23.7 L 251 43 L 263 18.3 L 272.7 33.3 L 283 10 L 295 32.3 L 301.3 23 L 311.7 37 L 323.7 7.7 L 339.3 39 L 346.3 25.7 L 356.3 42.3 L 369.7 15 L 376.3 25.7 L 384 9 L 393 28.3 L 400.3 19 L 411.7 38.3 L 421 21 L 434.3 43 L 445 25 L 453 36.3 L 464.3 18.3 L 476.2 40.3 L 480 33.5 L 480 215 L 0 215 L 0 35.5 Z" fill="#175720"></path>
+</svg>
+</img></p>
+```
diff --git a/test/command/minimal.svg b/test/command/minimal.svg
new file mode 100644
index 000000000..ed146f27f
--- /dev/null
+++ b/test/command/minimal.svg
@@ -0,0 +1,5 @@
+<?xml version="1.0" standalone="no"?>
+
+<svg viewBox="-.333 -.333 480 150" style="background-color:#ffffff00" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve">
+ <path d="M 0 35.5 L 6.5 22.5 L 16 37 L 23 24 L 34.8 43.7 L 42.5 30 L 50.3 47 L 59.7 27.7 L 69 47 L 85 17.7 L 98.3 39 L 113 9.7 L 127.7 42.3 L 136.3 23.7 L 147 44.3 L 158.3 20.3 L 170.3 40.3 L 177.7 25.7 L 189.7 43 L 199.7 21 L 207.7 35 L 219 11 L 233 37 L 240.3 23.7 L 251 43 L 263 18.3 L 272.7 33.3 L 283 10 L 295 32.3 L 301.3 23 L 311.7 37 L 323.7 7.7 L 339.3 39 L 346.3 25.7 L 356.3 42.3 L 369.7 15 L 376.3 25.7 L 384 9 L 393 28.3 L 400.3 19 L 411.7 38.3 L 421 21 L 434.3 43 L 445 25 L 453 36.3 L 464.3 18.3 L 476.2 40.3 L 480 33.5 L 480 215 L 0 215 L 0 35.5 Z" fill="#175720"/>
+</svg>