Skip to content

Make DeflateStream write out headers and footers #118570

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ public partial class DeflateStream : Stream
private volatile bool _activeAsyncOperation;
private bool _wroteBytes;

// Some deflaters (e.g. ZLib) write more than zero bytes for zero byte inputs.
// This round-trips and we should be ok with this, but our legacy managed deflater
// always wrote zero output for zero input and upstack code (e.g. ZipArchiveEntry)
// took dependencies on it. Thus, if we are opened from ZipArchiveEntry, we make
// sure to only "flush" when we actually had some input.
private bool _noFlushOnEmpty;

internal DeflateStream(Stream stream, CompressionMode mode, long uncompressedSize) : this(stream, mode, leaveOpen: false, ZLibNative.Deflate_DefaultWindowBits, uncompressedSize)
{
}
Expand All @@ -46,6 +53,12 @@ public DeflateStream(Stream stream, CompressionLevel compressionLevel, bool leav
{
}

// internal constructor to use from ZipArchiveEntry
internal DeflateStream(Stream stream, CompressionLevel compressionLevel, bool leaveOpen, bool noFlushOnEmpty) : this(stream, compressionLevel, leaveOpen)
{
_noFlushOnEmpty = noFlushOnEmpty;
}

/// <summary>
/// Initializes a new instance of the <see cref="DeflateStream"/> class by using the specified stream, compression options, and optionally leaves the stream open.
/// </summary>
Expand All @@ -62,7 +75,7 @@ internal DeflateStream(Stream stream, ZLibCompressionOptions compressionOptions,
ArgumentNullException.ThrowIfNull(stream);
ArgumentNullException.ThrowIfNull(compressionOptions);

InitializeDeflater(stream, (ZLibNative.CompressionLevel)compressionOptions.CompressionLevel, (CompressionStrategy)compressionOptions.CompressionStrategy, leaveOpen, windowBits);
InitializeDeflater(stream, (ZLibNative.CompressionLevel)compressionOptions.CompressionLevel, (CompressionStrategy)compressionOptions.CompressionStrategy, leaveOpen, windowBits);
}

/// <summary>
Expand Down Expand Up @@ -616,12 +629,7 @@ private void PurgeBuffers(bool disposing)
return;

Debug.Assert(_deflater != null && _buffer != null);
// Some deflaters (e.g. ZLib) write more than zero bytes for zero byte inputs.
// This round-trips and we should be ok with this, but our legacy managed deflater
// always wrote zero output for zero input and upstack code (e.g. ZipArchiveEntry)
// took dependencies on it. Thus, make sure to only "flush" when we actually had
// some input:
if (_wroteBytes)
if (_wroteBytes || !_noFlushOnEmpty)
{
// Compress any bytes left
WriteDeflaterOutput();
Expand Down Expand Up @@ -663,12 +671,7 @@ private async ValueTask PurgeBuffersAsync()
return;

Debug.Assert(_deflater != null && _buffer != null);
// Some deflaters (e.g. ZLib) write more than zero bytes for zero byte inputs.
// This round-trips and we should be ok with this, but our legacy managed deflater
// always wrote zero output for zero input and upstack code (e.g. ZipArchiveEntry)
// took dependencies on it. Thus, make sure to only "flush" when we actually had
// some input.
if (_wroteBytes)
if (_wroteBytes || !_noFlushOnEmpty)
{
// Compress any bytes left
await WriteDeflaterOutputAsync(default).ConfigureAwait(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,7 @@ private CheckSumAndSizeWriteStream GetDataCompressor(Stream backingStream, bool
case CompressionMethodValues.Deflate:
case CompressionMethodValues.Deflate64:
default:
compressorStream = new DeflateStream(backingStream, _compressionLevel, leaveBackingStreamOpen);
compressorStream = new DeflateStream(backingStream, _compressionLevel, leaveBackingStreamOpen, true);
break;

}
Expand Down Expand Up @@ -975,7 +975,6 @@ private bool WriteLocalFileHeaderInitialize(bool isEmptyFile, bool forceWrite, o
CompressionMethod = CompressionMethodValues.Stored;
compressedSizeTruncated = 0;
uncompressedSizeTruncated = 0;
Debug.Assert(_compressedSize == 0);
Debug.Assert(_uncompressedSize == 0);
Debug.Assert(_crc32 == 0);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,11 @@ public void StreamTruncation_IsDetected(TestScenario testScenario)
break;

case TestScenario.Read:
while (ZipFileTestBase.ReadAllBytes(decompressor, buffer, 0, buffer.Length) != 0) { };
while (ZipFileTestBase.ReadAllBytes(decompressor, buffer, 0, buffer.Length) != 0) { }
break;

case TestScenario.ReadAsync:
while (await ZipFileTestBase.ReadAllBytesAsync(decompressor, buffer, 0, buffer.Length) != 0) { };
while (await ZipFileTestBase.ReadAllBytesAsync(decompressor, buffer, 0, buffer.Length) != 0) { }
break;

case TestScenario.ReadByte:
Expand Down Expand Up @@ -219,5 +219,20 @@ public override Task WriteAsync(byte[] buffer, int offset, int count, Cancellati
return base.WriteAsync(buffer, offset, count, cancellationToken);
}
}

[Fact]
public void EmptyDeflateStream_WritesOutput()
{
using (var ms = new MemoryStream())
{
using (var deflateStream = new DeflateStream(ms, CompressionMode.Compress, leaveOpen: true))
{
// Write nothing
}

// DeflateStream should now write output even for empty streams
Assert.True(ms.Length > 0, "Empty DeflateStream should write finalization data");
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public async Task ConcatenatedEmptyGzipStreams()
[InlineData(10, TestScenario.ReadAsync, 1000, 2000)]
[InlineData(10, TestScenario.Copy, 1000, 2000)]
[InlineData(10, TestScenario.CopyAsync, 1000, 2000)]
[InlineData(2, TestScenario.Copy, 1000, 0x2000-30)]
[InlineData(2, TestScenario.Copy, 1000, 0x2000 - 30)]
[InlineData(2, TestScenario.CopyAsync, 1000, 0x2000 - 30)]
[InlineData(1000, TestScenario.Read, 1, 1)]
[InlineData(1000, TestScenario.ReadAsync, 1, 1)]
Expand Down Expand Up @@ -315,7 +315,7 @@ public void StreamCorruption_IsDetected()
{
Assert.Throws<InvalidDataException>(() =>
{
while (ZipFileTestBase.ReadAllBytes(decompressor, buffer, 0, buffer.Length) != 0);
while (ZipFileTestBase.ReadAllBytes(decompressor, buffer, 0, buffer.Length) != 0) ;
});
}
}
Expand Down Expand Up @@ -377,11 +377,11 @@ public void StreamTruncation_IsDetected(TestScenario testScenario)
break;

case TestScenario.Read:
while (ZipFileTestBase.ReadAllBytes(decompressor, buffer, 0, buffer.Length) != 0) { };
while (ZipFileTestBase.ReadAllBytes(decompressor, buffer, 0, buffer.Length) != 0) { }
break;

case TestScenario.ReadAsync:
while (await ZipFileTestBase.ReadAllBytesAsync(decompressor, buffer, 0, buffer.Length) != 0) { };
while (await ZipFileTestBase.ReadAllBytesAsync(decompressor, buffer, 0, buffer.Length) != 0) { }
break;

case TestScenario.ReadByte:
Expand Down Expand Up @@ -441,5 +441,31 @@ public override Task WriteAsync(byte[] buffer, int offset, int count, Cancellati
return base.WriteAsync(buffer, offset, count, cancellationToken);
}
}

[Fact]
public void EmptyGZipStream_WritesHeaderAndFooter()
{
// Test that an empty GZip stream still writes the required headers and footers
using (var ms = new MemoryStream())
{
using (var gzipStream = new GZipStream(ms, CompressionMode.Compress, leaveOpen: true))
{
// Write nothing
}

// At minimum it should have the GZip signature (0x1f 0x8b) and other required data
Assert.True(ms.Length > 0, "Empty GZip stream should write headers and footers");

// Verify the compressed data can be decompressed successfully
ms.Seek(0, SeekOrigin.Begin);
using (var decompressStream = new GZipStream(ms, CompressionMode.Decompress))
using (var resultStream = new MemoryStream())
{
decompressStream.CopyTo(resultStream);
Assert.Equal(0, resultStream.Length);
}
}
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ public static async Task LargeArchive_DataDescriptor_Read_NonZip64_FileLengthGre
Stream source = await OpenEntryStream(async, e);
byte[] buffer = new byte[s_bufferSize];
int read = await source.ReadAsync(buffer, 0, buffer.Length); // We don't want to inflate this large archive entirely
// just making sure it read successfully
// just making sure it read successfully
Assert.Equal(s_bufferSize, read);
foreach (byte b in buffer)
{
Expand Down Expand Up @@ -564,7 +564,7 @@ public static async Task UpdateZipArchive_AddFileTo_ZipWithCorruptedFile(bool as
byte[] buffer2 = new byte[1024];
file.Seek(0, SeekOrigin.Begin);

while (await s.ReadAsync(buffer1, 0, buffer1.Length) != 0 )
while (await s.ReadAsync(buffer1, 0, buffer1.Length) != 0)
{
await file.ReadAsync(buffer2, 0, buffer2.Length);
Assert.Equal(buffer1, buffer2);
Expand Down Expand Up @@ -820,7 +820,7 @@ public static IEnumerable<object[]> EmptyFiles()
[MemberData(nameof(EmptyFiles))]
public async Task ReadArchive_WithEmptyDeflatedFile(byte[] fileBytes, bool async)
{
using (var testStream = new MemoryStream(fileBytes))
using (var testStream = new MemoryStream(fileBytes.ToArray()))
{
const string ExpectedFileName = "xl/customProperty2.bin";

Expand All @@ -834,7 +834,7 @@ public async Task ReadArchive_WithEmptyDeflatedFile(byte[] fileBytes, bool async
byte[] fileContent = testStream.ToArray();

// compression method should not have changed
Assert.Equal(firstEntryCompressionMethod, fileBytes[8]);
Assert.Equal(firstEntryCompressionMethod, fileContent[8]);

testStream.Seek(0, SeekOrigin.Begin);
// second attempt: open archive with zero-length file that is compressed (Deflate = 0x8)
Expand Down Expand Up @@ -1418,7 +1418,7 @@ public async Task ZipArchive_InvalidExtraFieldData_Async(byte validVersionToExtr
// for first.bin would normally be skipped (because it hasn't changed) but it needs to be rewritten
// because the central directory headers will be rewritten with a valid value and the local file header
// needs to match.
await using (ZipArchive updatedArchive = await ZipArchive.CreateAsync(updatedStream, ZipArchiveMode.Update, leaveOpen: true, entryNameEncoding: null))
await using (ZipArchive updatedArchive = await ZipArchive.CreateAsync(updatedStream, ZipArchiveMode.Update, leaveOpen: true, entryNameEncoding: null))
{
ZipArchiveEntry newEntry = updatedArchive.CreateEntry("second.bin", CompressionLevel.NoCompression);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,62 +92,39 @@ public static async Task EmptyEntryTest(ZipArchiveMode mode, bool async)
string data2 = "more test data written to file.";
DateTimeOffset lastWrite = new DateTimeOffset(1992, 4, 5, 12, 00, 30, new TimeSpan(-5, 0, 0));

var baseline = new LocalMemoryStream();
ZipArchive archive = await CreateZipArchive(async, baseline, mode);

await AddEntry(archive, "data1.txt", data1, lastWrite, async);

ZipArchiveEntry e = archive.CreateEntry("empty.txt");
e.LastWriteTime = lastWrite;

Stream s = await OpenEntryStream(async, e);
await DisposeStream(async, s);

await AddEntry(archive, "data2.txt", data2, lastWrite, async);

await DisposeZipArchive(async, archive);

var test = new LocalMemoryStream();
archive = await CreateZipArchive(async, test, mode);

await AddEntry(archive, "data1.txt", data1, lastWrite, async);

e = archive.CreateEntry("empty.txt");
e.LastWriteTime = lastWrite;

await AddEntry(archive, "data2.txt", data2, lastWrite, async);

await DisposeZipArchive(async, archive);

//compare
Assert.True(ArraysEqual(baseline.ToArray(), test.ToArray()), "Arrays didn't match");

//second test, this time empty file at end
baseline = baseline.Clone();
archive = await CreateZipArchive(async, baseline, mode);
async Task<byte[]> WriteTestArchive(bool openEntryStream, bool emptyEntryAtTheEnd)
{
var archiveStream = new LocalMemoryStream();
ZipArchive archive = await CreateZipArchive(async, archiveStream, mode);

await AddEntry(archive, "data1.txt", data1, lastWrite, async);
await AddEntry(archive, "data1.txt", data1, lastWrite, async);

e = archive.CreateEntry("empty.txt");
e.LastWriteTime = lastWrite;
ZipArchiveEntry e = archive.CreateEntry("empty.txt");
e.LastWriteTime = lastWrite;

s = await OpenEntryStream(async, e);
await DisposeStream(async, s);

await DisposeZipArchive(async, archive);
if (openEntryStream)
{
Stream s = await OpenEntryStream(async, e);
await DisposeStream(async, s);
}

test = test.Clone();
archive = await CreateZipArchive(async, test, mode);
if (!emptyEntryAtTheEnd)
{
await AddEntry(archive, "data2.txt", data2, lastWrite, async);
}

await AddEntry(archive, "data1.txt", data1, lastWrite, async);
await DisposeZipArchive(async, archive);

e = archive.CreateEntry("empty.txt");
e.LastWriteTime = lastWrite;
return archiveStream.ToArray();
}

await DisposeZipArchive(async, archive);
var baseline = await WriteTestArchive(openEntryStream: false, emptyEntryAtTheEnd: false);
var test = await WriteTestArchive(openEntryStream: true, emptyEntryAtTheEnd: false);
Assert.Equal(baseline, test);

//compare
Assert.True(ArraysEqual(baseline.ToArray(), test.ToArray()), "Arrays didn't match after update");
baseline = await WriteTestArchive(openEntryStream: false, emptyEntryAtTheEnd: true);
test = await WriteTestArchive(openEntryStream: true, emptyEntryAtTheEnd: true);
Assert.Equal(baseline, test);
}

[Theory]
Expand Down Expand Up @@ -474,10 +451,10 @@ public async Task Update_PerformMinimalWritesWhenNoFilesChanged(bool async)

public static IEnumerable<object[]> Get_Update_PerformMinimalWritesWhenFixedLengthEntryHeaderFieldChanged_Data()
{
yield return [ 49, 1, 1, ];
yield return [ 40, 3, 2, ];
yield return [ 30, 5, 3, ];
yield return [ 0, 8, 1, ];
yield return [49, 1, 1,];
yield return [40, 3, 2,];
yield return [30, 5, 3,];
yield return [0, 8, 1,];
}

[Theory]
Expand Down Expand Up @@ -657,9 +634,9 @@ public static IEnumerable<object[]> Get_Update_PerformMinimalWritesWhenEntryData

public static IEnumerable<object[]> Get_PerformMinimalWritesWithDataAndHeaderChanges_Data()
{
yield return [ 0, 0 ];
yield return [ 20, 40 ];
yield return [ 30, 10 ];
yield return [0, 0];
yield return [20, 40];
yield return [30, 10];
}

[Theory]
Expand Down Expand Up @@ -914,11 +891,11 @@ public async Task Update_PerformMinimalWritesWhenArchiveCommentChanged_Async()

public static IEnumerable<object[]> Get_Update_PerformMinimalWritesWhenEntriesModifiedAndDeleted_Data()
{
yield return [ -1, 40 ];
yield return [ -1, 49 ];
yield return [ -1, 0 ];
yield return [ 42, 40 ];
yield return [ 38, 40 ];
yield return [-1, 40];
yield return [-1, 49];
yield return [-1, 0];
yield return [42, 40];
yield return [38, 40];
}

[Theory]
Expand Down Expand Up @@ -1109,10 +1086,10 @@ public async Task Update_PerformMinimalWritesWhenEntriesModifiedAndDeleted_Async

public static IEnumerable<object[]> Get_Update_PerformMinimalWritesWhenEntriesModifiedAndAdded_Data()
{
yield return [ 1 ];
yield return [ 5 ];
yield return [ 10 ];
yield return [ 12 ];
yield return [1];
yield return [5];
yield return [10];
yield return [12];
}

[Theory]
Expand Down