diff options
| author | Albert Krewinkel <[email protected]> | 2025-08-05 17:25:42 +0200 |
|---|---|---|
| committer | John MacFarlane <[email protected]> | 2025-08-06 12:48:59 -0700 |
| commit | 3a185fb5d008f8ffb1cc1cb03b805a29df24a2c9 (patch) | |
| tree | 70e284fe29acce3a258b3393e2be89f76e0d96cd | |
| parent | 48e59436ec0cb19bd7ee3966d71af0e41d2debc2 (diff) | |
LaTeX writer: set `pdf-trailer-id` if `SOURCE_DATE_EPOCH` envvar is set
The `SOURCE_DATE_EPOCH` environment variable is used to trigger
reproducible PDF compilation, i.e., PDFs that are identical down to the
byte level for repeated runs.
Closes: #6539
| -rw-r--r-- | MANUAL.txt | 15 | ||||
| -rw-r--r-- | data/templates/default.latex | 13 | ||||
| -rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 25 |
3 files changed, 50 insertions, 3 deletions
diff --git a/MANUAL.txt b/MANUAL.txt index 1f9f04362..4bd8c9e14 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3209,6 +3209,16 @@ These variables function when using BibLaTeX for [citation rendering]. `natbiboptions` : list of options for natbib +#### Other + +`pdf-trailer-id` +: the PDF trailer ID; must be two PDF byte strings if set, + conventionally with 16 bytes each. E.g., + `<00112233445566778899aabbccddeeff> + <00112233445566778899aabbccddeeff>`. + + See the section on [reproducible builds]. + [KOMA-Script]: https://ctan.org/pkg/koma-script [LaTeX Font Catalogue]: https://tug.org/FontCatalogue/ [LaTeX font encodings guide]: https://ctan.org/pkg/encguide @@ -7541,6 +7551,11 @@ be taken from it instead of the current time. `SOURCE_DATE_EPOCH` should contain an integer unix timestamp (specifying the number of seconds since midnight UTC January 1, 1970). +For reproducible builds with LaTeX, you can either specify the +`pdf-trailer-id` in the metadata or leave it undefined, in which +case pandoc will create a trailer-id based on a hash of the +`SOURCE_DATE_EPOCH` and the document's contents. + Some document formats also include a unique identifier. For EPUB, this can be set explicitly by setting the `identifier` metadata field (see [EPUB Metadata], above). diff --git a/data/templates/default.latex b/data/templates/default.latex index 781133776..b008d3db2 100644 --- a/data/templates/default.latex +++ b/data/templates/default.latex @@ -40,6 +40,19 @@ $header-includes$ $endfor$ $after-header-includes.latex()$ $hypersetup.latex()$ +$if(pdf-trailer-id)$ + +\ifXeTeX +\special{pdf:trailerid [ $pdf-trailer-id$ ]} +\fi +\ifPDFTeX +\pdftrailerid{} +\pdftrailer{/ID [ $pdf-trailer-id$ ]} +\fi +\ifLuaTeX +\pdfvariable trailerid {[ $pdf-trailer-id$ ]} +\fi +$endif$ $if(title)$ \title{$title$$if(thanks)$\thanks{$thanks$}$endif$} diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f9261eb87..b3d545a6b 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -7,7 +7,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2024 John MacFarlane + Copyright : Copyright (C) 2006-2025 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <[email protected]> @@ -30,6 +30,7 @@ import Control.Monad liftM, when, unless ) +import Crypto.Hash (hashWith, MD5(MD5)) import Data.Containers.ListUtils (nubOrd) import Data.Char (isDigit, isAscii) import Data.List (intersperse, (\\)) @@ -40,7 +41,8 @@ import qualified Data.Text as T import Network.URI (unEscapeString) import Text.DocTemplates (FromContext(lookupContext), Val(..), renderTemplate) import Text.Collate.Lang (renderLang) -import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) +import Text.Pandoc.Class.PandocMonad (PandocMonad, getPOSIXTime, lookupEnv, + report, toLang) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, styleToLaTeX) @@ -63,6 +65,7 @@ import Text.Pandoc.Writers.LaTeX.Util (stringToLaTeX, StringContext(..), wrapDiv, hypertarget, labelFor, getListingsLanguage, mbBraced) import Text.Pandoc.Writers.Shared +import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.Pandoc.Writers.AnnotatedTable as Ann -- Work around problems with notes inside emphasis (see #8982) @@ -178,6 +181,19 @@ pandocToLaTeX options (Pandoc meta blocks) = do st <- get titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta + -- The trailer ID is as hash used to identify the PDF. Taking control of its + -- value is important when aiming for reproducible PDF generation. Setting + -- `SOURCE_DATE_EPOCH` is the traditional method used to control + -- reproducible builds. There are no cryptographic requirements for the ID, + -- so the 128bits (16 bytes) of MD5 are appropriate. + reproduciblePDF <- isJust <$> lookupEnv "SOURCE_DATE_EPOCH" + trailerID <- do + time <- getPOSIXTime + let hash = T.pack . show . hashWith MD5 $ mconcat + [ UTF8.fromString $ show time + , UTF8.fromText $ render Nothing main + ] + pure $ mconcat [ "<", hash, "> <", hash, ">" ] -- we need a default here since lang is used in template conditionals let hasStringValue x = isJust (getField x metadata :: Maybe (Doc Text)) let geometryFromMargins = mconcat $ intersperse ("," :: Doc Text) $ @@ -254,7 +270,10 @@ pandocToLaTeX options (Pandoc meta blocks) = do Just (Just ('A', ds)) | not (T.null ds) && T.all isDigit ds -> resetField "papersize" ("a" <> ds) - _ -> id) + _ -> id) . + (if reproduciblePDF + then defField "pdf-trailer-id" trailerID + else id) $ metadata let babelLang = mblang >>= toBabel let context' = |
