Skip to content

Commit 0dd69f0

Browse files
committed
cmd/photocamera-archiver: make archives self-contained
The IA deals poorly with more than a handful of files.
1 parent 0f70202 commit 0dd69f0

File tree

1 file changed

+125
-88
lines changed

1 file changed

+125
-88
lines changed

cmd/photocamera-archiver/photocamera.go

Lines changed: 125 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,12 @@
11
// Command photocamera-archiver creates an archive of a Static Certificate
22
// Transparency log, by compressing tiles into zip files, each containing a
3-
// subtree 16,777,216 entries wide (65,536 level -1 and 0 tiles, 256 level 1
4-
// tiles, and 1 level 2 tile). The checkpoint, JSON metadata, and level 3+ tiles
5-
// are left uncompressed. The zip files are stored at tile/zip/<N>.zip.
6-
// Unnecessary partial tiles at levels 3+ are also removed.
7-
//
8-
// After running this tool, archive the following files and directories:
9-
//
10-
// - checkpoint
11-
// - log.v3.json
12-
// - tile/zip/
13-
// - tile/3/
14-
// - tile/4/ (if present)
15-
// - issuer/
3+
// subtree of up to 16,777,216 entries (65,536 level -1 and 0 tiles, 256 level 1
4+
// tiles, and 1 level 2 tile). The checkpoint, JSON metadata, issuers, level 3+
5+
// tiles, and partial tiles on the right edge are included in every zip file.
6+
// The zip files are stored at archive/<N>.zip.
167
package main
178

189
import (
19-
"bytes"
2010
"context"
2111
"crypto/x509"
2212
"encoding/json"
@@ -37,6 +27,39 @@ import (
3727
"golang.org/x/mod/sumdb/tlog"
3828
)
3929

30+
const README = `This is an archive of a Certificate Transparency log, stored in the
31+
c2sp.org/[email protected] format, although if this log was originally served
32+
through RFC 6962 APIs, leaves might miss the LeafIndex extension.
33+
34+
The log is split over multiple zip files. Archive <N>.zip contains the tiles
35+
at levels -1 (the data entries), 0, 1, and 2 belonging to subtree
36+
37+
[ 256×256×256×N, min(256×256×256×(N+1), TreeSize) )
38+
39+
In other words, each zip file contains one level 2 tile, and all those below it.
40+
41+
Every zip file also contains the following files:
42+
43+
- README.txt — this file
44+
45+
- checkpoint — a Signed Tree Head in c2sp.org/tlog-checkpoint format
46+
47+
- log.v3.json — a JSON specification of the log, including its log ID, public
48+
key, and original URL
49+
50+
- issuers/* — the X.509 chain issuers for the whole log
51+
52+
- tile/{3,4}/* — higher-level tiles, including the hash of other level 2
53+
tiles not included in this archive
54+
55+
- tile/{0,1,2}/*.p/* — partial tiles on the right edge of the tree, if any,
56+
necessary to compute the tree head
57+
58+
This archive was generated by filippo.io/sunlight/cmd/photocamera-archiver.
59+
`
60+
61+
const archiveWidth = 256 * 256 * 256
62+
4063
func main() {
4164
logger := slog.New(stdlog.Handler)
4265

@@ -47,8 +70,8 @@ func main() {
4770
if err != nil {
4871
fatalError(logger, "failed to open local directory", "err", err)
4972
}
50-
if err := root.MkdirAll("tile/zip", 0o755); err != nil {
51-
fatalError(logger, "failed to create zip directory", "err", err)
73+
if err := root.MkdirAll("archive", 0o755); err != nil {
74+
fatalError(logger, "failed to create archive directory", "err", err)
5275
}
5376
tr := localTileReader{root.FS()}
5477

@@ -88,28 +111,68 @@ func main() {
88111
}
89112
logger.Info("loaded checkpoint", "tree_size", c.N, "root_hash", c.Hash)
90113

114+
var globalTiles []tlog.Tile
115+
for L := 5; L >= 0; L-- {
116+
levelHashes := c.N / int64(1<<(sunlight.TileHeight*L))
117+
if L >= 3 {
118+
// All level 3+ tiles.
119+
for N := int64(0); (N * 256) < levelHashes; N++ {
120+
W := int(min(256, levelHashes-(N*256)))
121+
tile := tlog.Tile{H: sunlight.TileHeight, L: L, N: N, W: W}
122+
globalTiles = append(globalTiles, tile)
123+
}
124+
} else {
125+
// Only the partial tile on the right edge, if any.
126+
if W := int(levelHashes % 256); W != 0 {
127+
N := levelHashes / 256
128+
tile := tlog.Tile{H: sunlight.TileHeight, L: L, N: N, W: W}
129+
globalTiles = append(globalTiles, tile)
130+
}
131+
}
132+
}
133+
91134
hr := torchwood.TileHashReaderWithContext(ctx, c.Tree, tr)
92135

93-
for n := int64(0); n < c.N; n += 256 * 256 * 256 {
94-
i := n / (256 * 256 * 256)
136+
for n := int64(0); n < c.N; n += archiveWidth {
137+
i := n / archiveWidth
95138
if i >= 1000 {
96139
fatalError(logger, "cannot archive more than 1000 zip files")
97140
}
98-
name := fmt.Sprintf("tile/zip/%03d.zip", i)
99-
subtree := min(256*256*256, c.N-n)
141+
name := fmt.Sprintf("archive/%03d.zip", i)
142+
subtree := min(archiveWidth, c.N-n)
100143
logger.Info("processing subtree", "name", name, "start", n, "end", n+subtree)
101144
f, err := root.Create(name)
102145
if err != nil {
103146
fatalError(logger, "failed to create zip file", "name", name, "err", err)
104147
}
105148
w := zip.NewWriter(f)
106-
comment := fmt.Sprintf("%s %03d", c.Origin, i)
149+
comment := fmt.Sprintf("%s archive (%d of %d, generated by photocamera-archiver)",
150+
c.Origin, i+1, (c.N+archiveWidth-1)/archiveWidth)
107151
if err := w.SetComment(comment); err != nil {
108152
fatalError(logger, "failed to set zip comment", "name", name, "err", err)
109153
}
154+
155+
// Store README, checkpoint, and log info. Uncompressed, since they're small.
156+
if err := storeMetadataFile(w, "README.txt", []byte(README)); err != nil {
157+
fatalError(logger, "failed to write README", "name", name, "err", err)
158+
}
159+
if err := storeMetadataFile(w, "checkpoint", checkpointBytes); err != nil {
160+
fatalError(logger, "failed to write checkpoint", "name", name, "err", err)
161+
}
162+
if err := storeMetadataFile(w, "log.v3.json", logBytes); err != nil {
163+
fatalError(logger, "failed to write log info", "name", name, "err", err)
164+
}
165+
166+
// Store high-level and partial tiles.
167+
for _, tile := range globalTiles {
168+
if err := storeTile(w, tile, hr); err != nil {
169+
fatalError(logger, "failed to store tile", "tile", sunlight.TilePath(tile), "err", err)
170+
}
171+
}
172+
173+
// Store sub-tree tiles, starting from higher-level ones.
110174
tiles := tlog.NewTiles(torchwood.TileHeight, n, n+subtree)
111175
pb := progressbar.Default(int64(len(tiles)), name)
112-
// Sort tiles in the zip files so that higher-level tiles come first.
113176
slices.SortStableFunc(tiles, func(a, b tlog.Tile) int {
114177
switch {
115178
case a.L < b.L:
@@ -121,26 +184,13 @@ func main() {
121184
}
122185
})
123186
for _, tile := range tiles {
124-
if tile.L >= 3 {
187+
if tile.L >= 3 || tile.W < sunlight.TileWidth {
188+
// Already stored as part of globalTiles.
125189
pb.Add(1)
126190
continue
127191
}
128-
path := sunlight.TilePath(tile)
129-
// Pull the hashes through TileHashReader instead of reading them
130-
// directly, so that their inclusion in the tree is verified.
131-
data, err := tlog.ReadTileData(tile, hr)
132-
if err != nil {
133-
fatalError(logger, "failed to read tile data", "tile", path, "err", err)
134-
}
135-
zf, err := w.CreateHeader(&zip.FileHeader{
136-
Name: path,
137-
Method: zip.Store, // hashes don't compress!
138-
})
139-
if err != nil {
140-
fatalError(logger, "failed to create zip entry", "tile", path, "err", err)
141-
}
142-
if _, err := zf.Write(data); err != nil {
143-
fatalError(logger, "failed to write zip entry", "tile", path, "err", err)
192+
if err := storeTile(w, tile, hr); err != nil {
193+
fatalError(logger, "failed to store tile", "tile", sunlight.TilePath(tile), "err", err)
144194
}
145195
pb.Add(1)
146196
if err := ctx.Err(); err != nil {
@@ -149,7 +199,8 @@ func main() {
149199
}
150200
pb.Reset()
151201
pb.ChangeMax64((subtree + 255) / 256)
152-
// Store data tiles after the Merkle tree tiles.
202+
203+
// Verify and store data tiles after the Merkle tree tiles.
153204
for _, tile := range tiles {
154205
if tile.L != 0 {
155206
continue
@@ -188,56 +239,42 @@ func main() {
188239
logger.Info("wrote zip file", "name", name)
189240
}
190241

191-
// Delete unnecessary tiles at level 3+, and verify the rest of them.
192-
for L := 3; L <= 5; L++ {
193-
levelDir := fmt.Sprintf("tile/%d", L)
194-
levelMaxSize := c.N >> (sunlight.TileHeight * L)
195-
if levelMaxSize == 0 {
196-
break
197-
}
198-
if err := fs.WalkDir(root.FS(), levelDir, func(path string, d fs.DirEntry, err error) error {
199-
if err != nil {
200-
return err
201-
}
202-
if d.IsDir() {
203-
return nil
204-
}
205-
t, err := sunlight.ParseTilePath(strings.TrimSuffix(path, ".p"))
206-
if err != nil {
207-
return fmt.Errorf("failed to parse tile path %s: %w", path, err)
208-
}
209-
if t.L != L {
210-
return fmt.Errorf("unexpected tile level %d, want %d", t.L, L)
211-
}
212-
size := t.N*sunlight.TileWidth + int64(t.W)
213-
if t.W != sunlight.TileWidth && size < levelMaxSize {
214-
// Partial tile, can be deleted.
215-
logger.Info("removing unnecessary partial tile", "tile", path,
216-
"size", size, "max", levelMaxSize)
217-
if err := os.Remove(path); err != nil {
218-
return fmt.Errorf("failed to remove tile %s: %w", path, err)
219-
}
220-
return nil
221-
}
222-
data, err := root.ReadFile(path)
223-
if err != nil {
224-
return fmt.Errorf("failed to read tile data %s: %w", path, err)
225-
}
226-
exp, err := tlog.ReadTileData(t, hr)
227-
if err != nil {
228-
return fmt.Errorf("failed to read tile data %s: %w", path, err)
229-
}
230-
if !bytes.Equal(data, exp) {
231-
return fmt.Errorf("tile data mismatch for %s", path)
232-
}
233-
logger.Info("verified tile", "tile", path)
234-
return nil
235-
}); err != nil {
236-
fatalError(logger, "failed to walk tile directory", "level", L, "err", err)
237-
}
242+
logger.Info("done")
243+
}
244+
245+
func storeMetadataFile(w *zip.Writer, name string, data []byte) error {
246+
zf, err := w.CreateHeader(&zip.FileHeader{
247+
Name: name,
248+
Method: zip.Store,
249+
})
250+
if err != nil {
251+
return fmt.Errorf("failed to create zip entry %q: %w", name, err)
238252
}
253+
if _, err := zf.Write(data); err != nil {
254+
return fmt.Errorf("failed to write zip entry %q: %w", name, err)
255+
}
256+
return nil
257+
}
239258

240-
logger.Info("done")
259+
func storeTile(w *zip.Writer, tile tlog.Tile, hr tlog.HashReader) error {
260+
path := sunlight.TilePath(tile)
261+
// Pull the hashes through TileHashReader instead of reading them
262+
// directly, so that their inclusion in the tree is verified.
263+
data, err := tlog.ReadTileData(tile, hr)
264+
if err != nil {
265+
return fmt.Errorf("failed to read tile data %q: %w", path, err)
266+
}
267+
zf, err := w.CreateHeader(&zip.FileHeader{
268+
Name: path,
269+
Method: zip.Store, // hashes don't compress!
270+
})
271+
if err != nil {
272+
return fmt.Errorf("failed to create zip entry %q: %w", path, err)
273+
}
274+
if _, err := zf.Write(data); err != nil {
275+
return fmt.Errorf("failed to write zip entry %q: %w", path, err)
276+
}
277+
return nil
241278
}
242279

243280
func verifyTileData(tile tlog.Tile, data []byte, hr tlog.HashReader) error {

0 commit comments

Comments
 (0)