diff options
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 92 | ||||
| -rw-r--r-- | src/Text/Pandoc/Shared.hs | 2 |
2 files changed, 84 insertions, 10 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 90aae0371..69f696558 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE TupleSections #-} @@ -17,13 +18,13 @@ the HTML using data URIs. module Text.Pandoc.SelfContained ( makeDataURI, makeSelfContained ) where import Codec.Compression.GZip as Gzip import Control.Applicative ((<|>)) -import Control.Monad.Trans (lift) import Data.ByteString (ByteString) import Data.ByteString.Base64 (encodeBase64) import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as L import qualified Data.Text as T import Data.Char (isAlphaNum, isAscii) +import Data.Digest.Pure.SHA (sha1, showDigest) import Network.URI (escapeURIString) import System.FilePath (takeDirectory, takeExtension, (</>)) import Text.HTML.TagSoup @@ -32,12 +33,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), fetchItem, import Text.Pandoc.Logging import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.Shared (renderTags', trim, tshow) +import Text.Pandoc.Shared (renderTags', trim, tshow, safeRead) import Text.Pandoc.URI (isURI) import Text.Pandoc.UTF8 (toString, toText, fromText) import Text.Pandoc.Parsing (ParsecT, runParserT) import qualified Text.Pandoc.Parsing as P import Control.Monad.Except (throwError, catchError) +import Data.Either (lefts, rights) +import Data.Maybe (isNothing) +import qualified Data.Map as M +import Control.Monad.State +-- import Debug.Trace isOk :: Char -> Bool isOk c = isAscii c && isAlphaNum c @@ -60,7 +66,14 @@ isSourceAttribute tagname (x,_) = x == "poster" || x == "data-background-image" -convertTags :: PandocMonad m => [Tag T.Text] -> m [Tag T.Text] +data ConvertState = + ConvertState + { isHtml5 :: Bool + , svgMap :: M.Map T.Text T.Text -- map from hash to id + } deriving (Show) + +convertTags :: PandocMonad m => + [Tag T.Text] -> StateT ConvertState m [Tag T.Text] convertTags [] = return [] convertTags (t@TagOpen{}:ts) | fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts @@ -132,20 +145,76 @@ convertTags (t@(TagOpen tagname as):ts) | any (isSourceAttribute tagname) as = do as' <- mapM processAttribute as + let attrs = rights as' + let svgContents = lefts as' rest <- convertTags ts - return $ TagOpen tagname as' : rest + case svgContents of + [] -> return $ TagOpen tagname attrs : rest + ((hash, tags) : _) -> do + svgmap <- gets svgMap + case M.lookup hash svgmap of + Just svgid -> do + let attrs' = if ("id", svgid) `elem` attrs + then [(k,v) | (k,v) <- attrs, k /= "id"] + else attrs + return $ TagOpen "svg" attrs' : + TagOpen "use" [("href", "#" <> svgid)] : + TagClose "use" : rest + Nothing -> + case dropWhile (not . isTagOpenName "svg") tags of + TagOpen "svg" svgattrs : tags' -> do + let attrs' = combineSvgAttrs svgattrs attrs + let (svgId, attrs'') = -- keep original image id if present + case lookup "id" as of + Just id' -> (id', attrs') + Nothing -> + let newid = "svg_" <> hash + in (newid, ("id", newid) : + filter (\(k,_) -> k /= "id") attrs') + modify $ \st -> st{ svgMap = M.insert hash svgId (svgMap st) } + return $ (TagOpen "svg" attrs'' : tags') ++ rest + _ -> return $ TagOpen tagname attrs : rest where processAttribute (x,y) = if isSourceAttribute tagname (x,y) then do res <- getData (fromAttrib "type" t) y case res of - AlreadyDataURI enc -> return (x, enc) - Fetched (mt,bs) -> return (x, makeDataURI (mt,bs)) - CouldNotFetch _ -> return (x, y) - else return (x,y) + AlreadyDataURI enc -> return $ Right (x, enc) + Fetched ("image/svg+xml", bs) -> do + let hash = T.pack (showDigest (sha1 (L.fromStrict bs))) + return $ Left (hash, parseTags (toText bs)) + Fetched (mt,bs) -> return $ Right (x, makeDataURI (mt,bs)) + CouldNotFetch _ -> return $ Right (x, y) + else return $ Right (x,y) convertTags (t:ts) = (t:) <$> convertTags ts +combineSvgAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] -> [(T.Text, T.Text)] +combineSvgAttrs svgAttrs imgAttrs = + case (mbViewBox, mbHeight, mbWidth) of + (Nothing, Just h, Just w) -> -- calculate viewBox + combinedAttrs ++ [("viewBox", T.unwords ["0", "0", tshow w, tshow h])] + (Just (llx,lly,urx,ury), Nothing, Nothing) -> -- calculate width, height + combinedAttrs ++ + [ ("width", tshow (floor urx - floor llx :: Int)) + , ("height", tshow (floor ury - floor lly :: Int)) ] + _ -> combinedAttrs + where + combinedAttrs = imgAttrs ++ + [(k,v) | (k,v) <- svgAttrs, isNothing (lookup k imgAttrs)] + parseViewBox t = + case map (safeRead . addZero) $ T.words t of + [Just llx, Just lly, Just urx, Just ury] -> Just (llx, lly, urx, ury) + _ -> Nothing + addZero t = + if "-." `T.isPrefixOf` t + then "-0." <> T.drop 2 t -- safeRead fails on -.33, needs -0.33 + else t + (mbViewBox :: Maybe (Double, Double, Double, Double)) = + lookup "viewBox" svgAttrs >>= parseViewBox + (mbHeight :: Maybe Int) = lookup "height" combinedAttrs >>= safeRead + (mbWidth :: Maybe Int) = lookup "width" combinedAttrs >>= safeRead + cssURLs :: PandocMonad m => FilePath -> ByteString -> m ByteString cssURLs d orig = do @@ -293,5 +362,10 @@ getData mimetype src makeSelfContained :: PandocMonad m => T.Text -> m T.Text makeSelfContained inp = do let tags = parseTags inp - out' <- convertTags tags + let html5 = case tags of + (TagOpen "!DOCTYPE" [("html","")]:_) -> True + _ -> False + let convertState = ConvertState { isHtml5 = html5, + svgMap = mempty } + out' <- evalStateT (convertTags tags) convertState return $ renderTags' out' diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 115e59907..125701a83 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -758,7 +758,7 @@ formatCode attr = B.fromList . walk fmt . B.toList renderTags' :: [Tag T.Text] -> T.Text renderTags' = renderTagsOptions renderOptions{ optMinimize = matchTags ["hr", "br", "img", - "meta", "link", "col"] + "meta", "link", "col", "use"] , optRawTag = matchTags ["script", "style"] } where matchTags tags = flip elem tags . T.toLower |
