aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/MediaBag.hs
blob: 1a4ea2ba1da8bc2d6f065bde6057fb2b64ffcb0d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
{-# LANGUAGE CPP                        #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE DeriveDataTypeable         #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{- |
   Module      : Text.Pandoc.MediaBag
   Copyright   : Copyright (C) 2014-2015, 2017-2024 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <[email protected]>
   Stability   : alpha
   Portability : portable

Definition of a MediaBag object to hold binary resources, and an
interface for interacting with it.
-}
module Text.Pandoc.MediaBag (
                     MediaItem(..),
                     MediaBag,
                     deleteMedia,
                     lookupMedia,
                     insertMedia,
                     mediaDirectory,
                     mediaItems
                     ) where
import Crypto.Hash (hashWith, SHA1(SHA1))
import qualified Data.ByteString.Lazy as BL
import Data.Data (Data)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isNothing)
import Data.Typeable (Typeable)
import System.FilePath
import qualified System.FilePath.Posix as Posix
import qualified System.FilePath.Windows as Windows
import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType)
import Data.Text (Text)
import qualified Data.Text as T
import Network.URI (URI (..), isURI, parseURI, unEscapeString)
import Data.List (isInfixOf)

data MediaItem =
  MediaItem
  { mediaMimeType :: MimeType
  , mediaPath :: FilePath
  , mediaContents :: BL.ByteString
  } deriving (Eq, Ord, Show, Data, Typeable)

-- | A container for a collection of binary resources, with names and
-- mime types.  Note that a 'MediaBag' is a Monoid, so 'mempty'
-- can be used for an empty 'MediaBag', and '<>' can be used to append
-- two 'MediaBag's.
newtype MediaBag = MediaBag (M.Map Text MediaItem)
        deriving (Semigroup, Monoid, Data, Typeable)

instance Show MediaBag where
  show bag = "MediaBag " ++ show (mediaDirectory bag)

-- | We represent paths with /, in normalized form.  Percent-encoding
-- is not resolved.
canonicalize :: FilePath -> Text
-- avoid an expensive call to isURI for data URIs:
canonicalize fp@('d':'a':'t':'a':':':_) = T.pack fp
canonicalize fp
  | isURI fp = T.pack fp
  | otherwise = T.replace "\\" "/" . T.pack . normalise $ fp

-- | Delete a media item from a 'MediaBag', or do nothing if no item corresponds
-- to the given path.
deleteMedia :: FilePath       -- ^ relative path and canonical name of resource
            -> MediaBag
            -> MediaBag
deleteMedia fp (MediaBag mediamap) =
  MediaBag $ M.delete (canonicalize fp) mediamap

-- | Insert a media item into a 'MediaBag', replacing any existing
-- value with the same name.
insertMedia :: FilePath       -- ^ relative path and canonical name of resource
            -> Maybe MimeType -- ^ mime type (Nothing = determine from extension)
            -> BL.ByteString  -- ^ contents of resource
            -> MediaBag
            -> MediaBag
insertMedia fp mbMime contents (MediaBag mediamap)
 | 'd':'a':'t':'a':':':_ <- fp
 , Just mt' <- mbMime
   = MediaBag (M.insert fp'
               MediaItem{ mediaPath = hashpath
                        , mediaContents = contents
                        , mediaMimeType = mt' } mediamap)
 | otherwise = MediaBag (M.insert fp' mediaItem mediamap)
 where
  mediaItem = MediaItem{ mediaPath = newpath
                       , mediaContents = contents
                       , mediaMimeType = mt }
  fp' = canonicalize fp
  fp'' = unEscapeString $ T.unpack fp'
  uri = parseURI fp
  hashpath = show (hashWith SHA1 (BL.toStrict contents)) <> ext
  newpath = if Posix.isRelative fp''
                 && Windows.isRelative fp''
                 && isNothing uri
                 && not (".." `isInfixOf` fp'')
                 && '%' `notElem` fp''
               then fp''
               else hashpath
  fallback = case takeExtension fp'' of
                  ".gz" -> getMimeTypeDef $ dropExtension fp''
                  _     -> getMimeTypeDef fp''
  mt = fromMaybe fallback mbMime
  path = maybe fp'' (unEscapeString . uriPath) uri
  ext = case extensionFromMimeType mt of
             Just e -> '.':T.unpack e
             Nothing -> case takeExtension path of
                             '.':e | '%' `notElem` e -> '.':e
                             _ -> ""

-- | Lookup a media item in a 'MediaBag', returning mime type and contents.
lookupMedia :: FilePath
            -> MediaBag
            -> Maybe MediaItem
lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap

-- | Get a list of the file paths stored in a 'MediaBag', with
-- their corresponding mime types and the lengths in bytes of the contents.
mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)]
mediaDirectory mediabag =
  map (\(fp, mt, bs) -> (fp, mt, fromIntegral (BL.length bs)))
    (mediaItems mediabag)

mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)]
mediaItems (MediaBag mediamap) =
  map (\item -> (mediaPath item, mediaMimeType item, mediaContents item))
      (M.elems mediamap)