aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <[email protected]>2025-07-28 23:13:09 -0700
committerJohn MacFarlane <[email protected]>2025-07-28 23:17:50 -0700
commit39a2fd43082a95a4fba5b0aa75411698df0ef22e (patch)
tree7070347605e33c083ef9d7fa6fac290c07d21dc7
parent6893b38a6e9e634c51d6dc789db03824feedb616 (diff)
Text.Pandoc.Class: Add `stManager` to CommonState.
[API change] This allows us to cache the HTTP client manager and reuse it for many requests, instead of creating it again (an expensive operation) for each request. (The documentation for `newManager` advises sharing a single manager between requests.) This fixes a memory leak and performance issue in files with a large number of remote images. Closes #10997.
-rw-r--r--src/Text/Pandoc/Class/CommonState.hs5
-rw-r--r--src/Text/Pandoc/Class/IO.hs59
2 files changed, 40 insertions, 24 deletions
diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs
index 4e04b5add..e3c4efce9 100644
--- a/src/Text/Pandoc/Class/CommonState.hs
+++ b/src/Text/Pandoc/Class/CommonState.hs
@@ -23,6 +23,7 @@ import Text.Collate.Lang (Lang)
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING))
import Text.Pandoc.Translations.Types (Translations)
+import Network.HTTP.Client (Manager)
-- | 'CommonState' represents state that is used by all
-- instances of 'PandocMonad'. Normally users should not
@@ -50,6 +51,9 @@ data CommonState = CommonState
, stResourcePath :: [FilePath]
-- ^ Path to search for resources like
-- included images
+ , stManager :: Maybe Manager
+ -- ^ Manager for HTTP client; this needs to persist across many requests
+ -- for efficiency.
, stVerbosity :: Verbosity
-- ^ Verbosity level
, stTrace :: Bool
@@ -75,6 +79,7 @@ defaultCommonState = CommonState
, stInputFiles = []
, stOutputFile = Nothing
, stResourcePath = ["."]
+ , stManager = Nothing
, stVerbosity = WARNING
, stTrace = False
}
diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs
index 2d337d942..de76bdc76 100644
--- a/src/Text/Pandoc/Class/IO.hs
+++ b/src/Text/Pandoc/Class/IO.hs
@@ -45,7 +45,7 @@ import Network.Connection (TLSSettings(..))
import qualified Network.TLS as TLS
import qualified Network.TLS.Extra as TLS
import Network.HTTP.Client
- (httpLbs, responseBody, responseHeaders,
+ (httpLbs, Manager, responseBody, responseHeaders,
Request(port, host, requestHeaders), parseRequest, newManager)
import Network.HTTP.Client.Internal (addProxy)
import Network.HTTP.Client.TLS (mkManagerSettings)
@@ -61,7 +61,8 @@ import System.IO.Error
import System.Random (StdGen)
import Text.Pandoc.Class.CommonState (CommonState (..))
import Text.Pandoc.Class.PandocMonad
- (PandocMonad, getsCommonState, getMediaBag, report, extractURIData)
+ (PandocMonad, getsCommonState, modifyCommonState,
+ getMediaBag, report, extractURIData)
import Text.Pandoc.Definition (Pandoc, Inline (Image))
import Text.Pandoc.Error (PandocError (..))
import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage)
@@ -70,7 +71,6 @@ import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems)
import Text.Pandoc.Walk (walk)
import qualified Control.Exception as E
import qualified Data.ByteString as B
-import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
import qualified Data.CaseInsensitive as CI
import qualified Data.Text as T
@@ -124,6 +124,36 @@ newStdGen = liftIO System.Random.newStdGen
newUniqueHash :: MonadIO m => m Int
newUniqueHash = hashUnique <$> liftIO Data.Unique.newUnique
+getManager :: (PandocMonad m, MonadIO m) => m Manager
+getManager = do
+ mbManager <- getsCommonState stManager
+ disableCertificateValidation <- getsCommonState stNoCheckCertificate
+ case mbManager of
+ Just manager -> pure manager
+ Nothing -> do
+ manager <- liftIO $ do
+ certificateStore <- getSystemCertificateStore
+ let tlsSettings = TLSSettings $
+ (TLS.defaultParamsClient "localhost.localdomain" "80")
+ { TLS.clientSupported = def{ TLS.supportedCiphers =
+ TLS.ciphersuite_default
+ , TLS.supportedExtendedMainSecret =
+ TLS.AllowEMS }
+ , TLS.clientShared = def
+ { TLS.sharedCAStore = certificateStore
+ , TLS.sharedValidationCache =
+ if disableCertificateValidation
+ then TLS.ValidationCache
+ (\_ _ _ -> return TLS.ValidationCachePass)
+ (\_ _ _ -> return ())
+ else def
+ }
+ }
+ let tlsManagerSettings = mkManagerSettings tlsSettings Nothing
+ newManager tlsManagerSettings
+ modifyCommonState $ \st -> st{ stManager = Just manager }
+ pure manager
+
openURL :: (PandocMonad m, MonadIO m) => Text -> m (B.ByteString, Maybe MimeType)
openURL u
| Just (URI{ uriScheme = "data:",
@@ -132,8 +162,8 @@ openURL u
| otherwise = do
let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v)
customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
- disableCertificateValidation <- getsCommonState stNoCheckCertificate
report $ Fetching u
+ manager <- getManager
res <- liftIO $ E.try $ withSocketsDo $ do
proxy <- tryIOError (getEnv "http_proxy")
let addProxy' x = case proxy of
@@ -142,26 +172,7 @@ openURL u
return (addProxy (host r) (port r) x)
req <- parseRequest (unpack u) >>= addProxy'
let req' = req{requestHeaders = customHeaders ++ requestHeaders req}
- certificateStore <- getSystemCertificateStore
- let tlsSettings = TLSSettings $
- (TLS.defaultParamsClient (show $ host req')
- (B8.pack $ show $ port req'))
- { TLS.clientSupported = def{ TLS.supportedCiphers =
- TLS.ciphersuite_default
- , TLS.supportedExtendedMainSecret =
- TLS.AllowEMS }
- , TLS.clientShared = def
- { TLS.sharedCAStore = certificateStore
- , TLS.sharedValidationCache =
- if disableCertificateValidation
- then TLS.ValidationCache
- (\_ _ _ -> return TLS.ValidationCachePass)
- (\_ _ _ -> return ())
- else def
- }
- }
- let tlsManagerSettings = mkManagerSettings tlsSettings Nothing
- resp <- newManager tlsManagerSettings >>= httpLbs req'
+ resp <- httpLbs req' manager
return (B.concat $ toChunks $ responseBody resp,
UTF8.toText `fmap` lookup hContentType (responseHeaders resp))