aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/SelfContained.hs92
-rw-r--r--src/Text/Pandoc/Shared.hs2
2 files changed, 84 insertions, 10 deletions
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 90aae0371..69f696558 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE TupleSections #-}
@@ -17,13 +18,13 @@ the HTML using data URIs.
module Text.Pandoc.SelfContained ( makeDataURI, makeSelfContained ) where
import Codec.Compression.GZip as Gzip
import Control.Applicative ((<|>))
-import Control.Monad.Trans (lift)
import Data.ByteString (ByteString)
import Data.ByteString.Base64 (encodeBase64)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy as L
import qualified Data.Text as T
import Data.Char (isAlphaNum, isAscii)
+import Data.Digest.Pure.SHA (sha1, showDigest)
import Network.URI (escapeURIString)
import System.FilePath (takeDirectory, takeExtension, (</>))
import Text.HTML.TagSoup
@@ -32,12 +33,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..), fetchItem,
import Text.Pandoc.Logging
import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.MIME (MimeType)
-import Text.Pandoc.Shared (renderTags', trim, tshow)
+import Text.Pandoc.Shared (renderTags', trim, tshow, safeRead)
import Text.Pandoc.URI (isURI)
import Text.Pandoc.UTF8 (toString, toText, fromText)
import Text.Pandoc.Parsing (ParsecT, runParserT)
import qualified Text.Pandoc.Parsing as P
import Control.Monad.Except (throwError, catchError)
+import Data.Either (lefts, rights)
+import Data.Maybe (isNothing)
+import qualified Data.Map as M
+import Control.Monad.State
+-- import Debug.Trace
isOk :: Char -> Bool
isOk c = isAscii c && isAlphaNum c
@@ -60,7 +66,14 @@ isSourceAttribute tagname (x,_) =
x == "poster" ||
x == "data-background-image"
-convertTags :: PandocMonad m => [Tag T.Text] -> m [Tag T.Text]
+data ConvertState =
+ ConvertState
+ { isHtml5 :: Bool
+ , svgMap :: M.Map T.Text T.Text -- map from hash to id
+ } deriving (Show)
+
+convertTags :: PandocMonad m =>
+ [Tag T.Text] -> StateT ConvertState m [Tag T.Text]
convertTags [] = return []
convertTags (t@TagOpen{}:ts)
| fromAttrib "data-external" t == "1" = (t:) <$> convertTags ts
@@ -132,20 +145,76 @@ convertTags (t@(TagOpen tagname as):ts)
| any (isSourceAttribute tagname) as
= do
as' <- mapM processAttribute as
+ let attrs = rights as'
+ let svgContents = lefts as'
rest <- convertTags ts
- return $ TagOpen tagname as' : rest
+ case svgContents of
+ [] -> return $ TagOpen tagname attrs : rest
+ ((hash, tags) : _) -> do
+ svgmap <- gets svgMap
+ case M.lookup hash svgmap of
+ Just svgid -> do
+ let attrs' = if ("id", svgid) `elem` attrs
+ then [(k,v) | (k,v) <- attrs, k /= "id"]
+ else attrs
+ return $ TagOpen "svg" attrs' :
+ TagOpen "use" [("href", "#" <> svgid)] :
+ TagClose "use" : rest
+ Nothing ->
+ case dropWhile (not . isTagOpenName "svg") tags of
+ TagOpen "svg" svgattrs : tags' -> do
+ let attrs' = combineSvgAttrs svgattrs attrs
+ let (svgId, attrs'') = -- keep original image id if present
+ case lookup "id" as of
+ Just id' -> (id', attrs')
+ Nothing ->
+ let newid = "svg_" <> hash
+ in (newid, ("id", newid) :
+ filter (\(k,_) -> k /= "id") attrs')
+ modify $ \st -> st{ svgMap = M.insert hash svgId (svgMap st) }
+ return $ (TagOpen "svg" attrs'' : tags') ++ rest
+ _ -> return $ TagOpen tagname attrs : rest
where processAttribute (x,y) =
if isSourceAttribute tagname (x,y)
then do
res <- getData (fromAttrib "type" t) y
case res of
- AlreadyDataURI enc -> return (x, enc)
- Fetched (mt,bs) -> return (x, makeDataURI (mt,bs))
- CouldNotFetch _ -> return (x, y)
- else return (x,y)
+ AlreadyDataURI enc -> return $ Right (x, enc)
+ Fetched ("image/svg+xml", bs) -> do
+ let hash = T.pack (showDigest (sha1 (L.fromStrict bs)))
+ return $ Left (hash, parseTags (toText bs))
+ Fetched (mt,bs) -> return $ Right (x, makeDataURI (mt,bs))
+ CouldNotFetch _ -> return $ Right (x, y)
+ else return $ Right (x,y)
convertTags (t:ts) = (t:) <$> convertTags ts
+combineSvgAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+combineSvgAttrs svgAttrs imgAttrs =
+ case (mbViewBox, mbHeight, mbWidth) of
+ (Nothing, Just h, Just w) -> -- calculate viewBox
+ combinedAttrs ++ [("viewBox", T.unwords ["0", "0", tshow w, tshow h])]
+ (Just (llx,lly,urx,ury), Nothing, Nothing) -> -- calculate width, height
+ combinedAttrs ++
+ [ ("width", tshow (floor urx - floor llx :: Int))
+ , ("height", tshow (floor ury - floor lly :: Int)) ]
+ _ -> combinedAttrs
+ where
+ combinedAttrs = imgAttrs ++
+ [(k,v) | (k,v) <- svgAttrs, isNothing (lookup k imgAttrs)]
+ parseViewBox t =
+ case map (safeRead . addZero) $ T.words t of
+ [Just llx, Just lly, Just urx, Just ury] -> Just (llx, lly, urx, ury)
+ _ -> Nothing
+ addZero t =
+ if "-." `T.isPrefixOf` t
+ then "-0." <> T.drop 2 t -- safeRead fails on -.33, needs -0.33
+ else t
+ (mbViewBox :: Maybe (Double, Double, Double, Double)) =
+ lookup "viewBox" svgAttrs >>= parseViewBox
+ (mbHeight :: Maybe Int) = lookup "height" combinedAttrs >>= safeRead
+ (mbWidth :: Maybe Int) = lookup "width" combinedAttrs >>= safeRead
+
cssURLs :: PandocMonad m
=> FilePath -> ByteString -> m ByteString
cssURLs d orig = do
@@ -293,5 +362,10 @@ getData mimetype src
makeSelfContained :: PandocMonad m => T.Text -> m T.Text
makeSelfContained inp = do
let tags = parseTags inp
- out' <- convertTags tags
+ let html5 = case tags of
+ (TagOpen "!DOCTYPE" [("html","")]:_) -> True
+ _ -> False
+ let convertState = ConvertState { isHtml5 = html5,
+ svgMap = mempty }
+ out' <- evalStateT (convertTags tags) convertState
return $ renderTags' out'
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 115e59907..125701a83 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -758,7 +758,7 @@ formatCode attr = B.fromList . walk fmt . B.toList
renderTags' :: [Tag T.Text] -> T.Text
renderTags' = renderTagsOptions
renderOptions{ optMinimize = matchTags ["hr", "br", "img",
- "meta", "link", "col"]
+ "meta", "link", "col", "use"]
, optRawTag = matchTags ["script", "style"] }
where matchTags tags = flip elem tags . T.toLower