aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlbert Krewinkel <[email protected]>2025-08-05 17:25:42 +0200
committerJohn MacFarlane <[email protected]>2025-08-06 12:48:59 -0700
commit3a185fb5d008f8ffb1cc1cb03b805a29df24a2c9 (patch)
tree70e284fe29acce3a258b3393e2be89f76e0d96cd
parent48e59436ec0cb19bd7ee3966d71af0e41d2debc2 (diff)
LaTeX writer: set `pdf-trailer-id` if `SOURCE_DATE_EPOCH` envvar is set
The `SOURCE_DATE_EPOCH` environment variable is used to trigger reproducible PDF compilation, i.e., PDFs that are identical down to the byte level for repeated runs. Closes: #6539
-rw-r--r--MANUAL.txt15
-rw-r--r--data/templates/default.latex13
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs25
3 files changed, 50 insertions, 3 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 1f9f04362..4bd8c9e14 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3209,6 +3209,16 @@ These variables function when using BibLaTeX for [citation rendering].
`natbiboptions`
: list of options for natbib
+#### Other
+
+`pdf-trailer-id`
+: the PDF trailer ID; must be two PDF byte strings if set,
+ conventionally with 16 bytes each. E.g.,
+ `<00112233445566778899aabbccddeeff>
+ <00112233445566778899aabbccddeeff>`.
+
+ See the section on [reproducible builds].
+
[KOMA-Script]: https://ctan.org/pkg/koma-script
[LaTeX Font Catalogue]: https://tug.org/FontCatalogue/
[LaTeX font encodings guide]: https://ctan.org/pkg/encguide
@@ -7541,6 +7551,11 @@ be taken from it instead of the current time.
`SOURCE_DATE_EPOCH` should contain an integer unix timestamp
(specifying the number of seconds since midnight UTC January 1, 1970).
+For reproducible builds with LaTeX, you can either specify the
+`pdf-trailer-id` in the metadata or leave it undefined, in which
+case pandoc will create a trailer-id based on a hash of the
+`SOURCE_DATE_EPOCH` and the document's contents.
+
Some document formats also include a unique identifier. For
EPUB, this can be set explicitly by setting the `identifier`
metadata field (see [EPUB Metadata], above).
diff --git a/data/templates/default.latex b/data/templates/default.latex
index 781133776..b008d3db2 100644
--- a/data/templates/default.latex
+++ b/data/templates/default.latex
@@ -40,6 +40,19 @@ $header-includes$
$endfor$
$after-header-includes.latex()$
$hypersetup.latex()$
+$if(pdf-trailer-id)$
+
+\ifXeTeX
+\special{pdf:trailerid [ $pdf-trailer-id$ ]}
+\fi
+\ifPDFTeX
+\pdftrailerid{}
+\pdftrailer{/ID [ $pdf-trailer-id$ ]}
+\fi
+\ifLuaTeX
+\pdfvariable trailerid {[ $pdf-trailer-id$ ]}
+\fi
+$endif$
$if(title)$
\title{$title$$if(thanks)$\thanks{$thanks$}$endif$}
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index f9261eb87..b3d545a6b 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -7,7 +7,7 @@
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Writers.LaTeX
- Copyright : Copyright (C) 2006-2024 John MacFarlane
+ Copyright : Copyright (C) 2006-2025 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <[email protected]>
@@ -30,6 +30,7 @@ import Control.Monad
liftM,
when,
unless )
+import Crypto.Hash (hashWith, MD5(MD5))
import Data.Containers.ListUtils (nubOrd)
import Data.Char (isDigit, isAscii)
import Data.List (intersperse, (\\))
@@ -40,7 +41,8 @@ import qualified Data.Text as T
import Network.URI (unEscapeString)
import Text.DocTemplates (FromContext(lookupContext), Val(..), renderTemplate)
import Text.Collate.Lang (renderLang)
-import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang)
+import Text.Pandoc.Class.PandocMonad (PandocMonad, getPOSIXTime, lookupEnv,
+ report, toLang)
import Text.Pandoc.Definition
import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight,
styleToLaTeX)
@@ -63,6 +65,7 @@ import Text.Pandoc.Writers.LaTeX.Util (stringToLaTeX, StringContext(..),
wrapDiv, hypertarget, labelFor,
getListingsLanguage, mbBraced)
import Text.Pandoc.Writers.Shared
+import qualified Text.Pandoc.UTF8 as UTF8
import qualified Text.Pandoc.Writers.AnnotatedTable as Ann
-- Work around problems with notes inside emphasis (see #8982)
@@ -178,6 +181,19 @@ pandocToLaTeX options (Pandoc meta blocks) = do
st <- get
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
+ -- The trailer ID is as hash used to identify the PDF. Taking control of its
+ -- value is important when aiming for reproducible PDF generation. Setting
+ -- `SOURCE_DATE_EPOCH` is the traditional method used to control
+ -- reproducible builds. There are no cryptographic requirements for the ID,
+ -- so the 128bits (16 bytes) of MD5 are appropriate.
+ reproduciblePDF <- isJust <$> lookupEnv "SOURCE_DATE_EPOCH"
+ trailerID <- do
+ time <- getPOSIXTime
+ let hash = T.pack . show . hashWith MD5 $ mconcat
+ [ UTF8.fromString $ show time
+ , UTF8.fromText $ render Nothing main
+ ]
+ pure $ mconcat [ "<", hash, "> <", hash, ">" ]
-- we need a default here since lang is used in template conditionals
let hasStringValue x = isJust (getField x metadata :: Maybe (Doc Text))
let geometryFromMargins = mconcat $ intersperse ("," :: Doc Text) $
@@ -254,7 +270,10 @@ pandocToLaTeX options (Pandoc meta blocks) = do
Just (Just ('A', ds))
| not (T.null ds) && T.all isDigit ds
-> resetField "papersize" ("a" <> ds)
- _ -> id)
+ _ -> id) .
+ (if reproduciblePDF
+ then defField "pdf-trailer-id" trailerID
+ else id) $
metadata
let babelLang = mblang >>= toBabel
let context' =