Skip to content

Commit 5f3a61b

Browse files
committed
add dedup ratio to information
1 parent 7c397dd commit 5f3a61b

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

packages/hub/src/utils/createXorbs.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ export async function* createXorbs(
3636
event: "file";
3737
path: string;
3838
hash: string;
39+
/** Percentage of file bytes that were deduplicated (0-1) */
40+
dedupRatio: number;
3941
representation: Array<{
4042
xorbId: number | string; // either xorb id (for local xorbs) or xorb hash (for remote xorbs)
4143
offset: number;
@@ -74,6 +76,7 @@ export async function* createXorbs(
7476
event: "file";
7577
path: string;
7678
hash: string;
79+
dedupRatio: number;
7780
representation: Array<{
7881
xorbId: number | string;
7982
offset: number;
@@ -84,14 +87,15 @@ export async function* createXorbs(
8487
}> = [];
8588

8689
const remoteXorbHashes: string[] = [""]; // starts at index 1 (to simplify implem a bit)
87-
let bytesSinceRemoteDedup = Infinity;
8890

8991
try {
9092
for await (const fileSource of fileSources) {
93+
let bytesSinceRemoteDedup = Infinity;
9194
const sourceChunks: Array<Uint8Array> = [];
9295

9396
const reader = fileSource.content.stream().getReader();
9497
let processedBytes = 0;
98+
let dedupedBytes = 0; // Track bytes that were deduplicated
9599
const fileChunks: Array<{ hash: string; length: number }> = [];
96100
let currentChunkRangeBeginning = 0;
97101
const fileRepresentation: Array<{
@@ -195,6 +199,7 @@ export async function* createXorbs(
195199
chunkXorbId = cacheData.xorbIndex;
196200
chunkOffset = cacheData.offset;
197201
chunkEndOffset = cacheData.endOffset;
202+
dedupedBytes += chunk.length; // Track deduplicated bytes
198203
}
199204
bytesSinceRemoteDedup += chunk.length;
200205

@@ -270,10 +275,13 @@ export async function* createXorbs(
270275
);
271276
}
272277

278+
const dedupRatio = fileSource.content.size > 0 ? dedupedBytes / fileSource.content.size : 0;
279+
273280
pendingFileEvents.push({
274281
event: "file" as const,
275282
path: fileSource.path,
276283
hash: chunkModule.compute_file_hash(fileChunks),
284+
dedupRatio,
277285
representation: fileRepresentation,
278286
});
279287
}

0 commit comments

Comments
 (0)