Skip to content

Commit 02e6ee3

Browse files
committed
Fix lazy-write of tars
1 parent 21b96b5 commit 02e6ee3

File tree

6 files changed

+214
-495
lines changed

6 files changed

+214
-495
lines changed

pkg/dockerbuild/tarcopy.go

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ func (tc *tarCopier) MkdirAll(dstPath ImagePath, mode fs.FileMode) (err error) {
265265
header := &tar.Header{
266266
Typeflag: tar.TypeDir,
267267
ModTime: modTime,
268-
Name: (dstPath + "/").String(), // from [archive/tar.FileInfoHeader]
268+
Name: tarHeaderName(dstPath, true),
269269
Mode: int64(mode.Perm()),
270270
}
271271
tc.entries = append(tc.entries, &tarEntry{
@@ -297,10 +297,8 @@ func (tc *tarCopier) CopyFile(dstPath ImagePath, srcPath HostPath, fi fs.FileInf
297297
header.Mode = 0755
298298
}
299299

300-
header.Name = filepath.ToSlash(dstPath.String())
301-
entry := &tarEntry{
302-
header: header,
303-
}
300+
header.Name = tarHeaderName(dstPath, fi.IsDir())
301+
entry := &tarEntry{header: header}
304302
tc.entries = append(tc.entries, entry)
305303

306304
if fi.IsDir() {
@@ -318,7 +316,7 @@ func (tc *tarCopier) CopyFile(dstPath ImagePath, srcPath HostPath, fi fs.FileInf
318316

319317
func (tc *tarCopier) WriteFile(dstPath ImagePath, mode fs.FileMode, data []byte) (err error) {
320318
header := &tar.Header{
321-
Name: filepath.ToSlash(dstPath.String()),
319+
Name: tarHeaderName(dstPath, false),
322320
Typeflag: tar.TypeReg,
323321
Mode: int64(mode.Perm()),
324322
Size: int64(len(data)),
@@ -351,8 +349,9 @@ func (tc *tarCopier) Opener() tarball.Opener {
351349
}
352350
}
353351

354-
var tv tarstream.TarVec
352+
var dvecs []tarstream.Datavec
355353
for _, e := range tc.entries {
354+
log.Info().Str("file", e.header.Name).Interface("header", e.header).Msg("processing file")
356355
// create buffer to write tar header to
357356
buf := new(bytes.Buffer)
358357
tw := tar.NewWriter(buf)
@@ -367,8 +366,7 @@ func (tc *tarCopier) Opener() tarball.Opener {
367366
}
368367

369368
// add the tar header mem buffer to the tarvec
370-
tv.Dvecs = append(tv.Dvecs, memv)
371-
tv.Size += memv.GetSize()
369+
dvecs = append(dvecs, memv)
372370

373371
var dataEntry tarstream.Datavec
374372
if hostPath, ok := e.hostPath.Get(); ok {
@@ -383,30 +381,31 @@ func (tc *tarCopier) Opener() tarball.Opener {
383381

384382
if dataEntry != nil {
385383
// add the file path info to the tarvec
386-
size := dataEntry.GetSize()
387-
tv.Size += size
388-
tv.Dvecs = append(tv.Dvecs, dataEntry)
389-
390-
// tar requires file entries to be padded out to
391-
// 512 byte offset
392-
// if needed, record how much padding is needed
393-
// and add to the tarvec
394-
if size%512 != 0 {
395-
padv := tarstream.PadVec{
396-
Size: 512 - (size % 512),
384+
dvecs = append(dvecs, dataEntry)
385+
386+
// tar requires file entries to be padded out to 512 bytes.
387+
if !e.header.FileInfo().IsDir() {
388+
if size := dataEntry.GetSize(); size%512 != 0 {
389+
padv := tarstream.PadVec{
390+
Size: 512 - (size % 512),
391+
}
392+
dvecs = append(dvecs, padv)
397393
}
398-
399-
tv.Dvecs = append(tv.Dvecs, padv)
400-
tv.Size += padv.GetSize()
401394
}
402395
}
403396
}
404397

405-
tv.ComputeSize()
406-
tv.Pos = 0
407-
398+
tv := tarstream.NewTarVec(dvecs)
408399
return func() (io.ReadCloser, error) {
409400
tv2 := tv.Clone()
410401
return tv2, nil
411402
}
412403
}
404+
405+
func tarHeaderName(p ImagePath, isDir bool) string {
406+
name := strings.TrimPrefix(filepath.ToSlash(p.String()), "/")
407+
if isDir {
408+
name += "/"
409+
}
410+
return name
411+
}

pkg/tarstream/datavec.go

Lines changed: 37 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package tarstream
22

33
import (
4+
"bytes"
45
"io"
56
"os"
67
)
@@ -14,58 +15,47 @@ type MemVec struct {
1415
type PathVec struct {
1516
Path string
1617
Info os.FileInfo
17-
file *os.File
1818
}
1919

2020
// PadVec is a padding (0s) vec type
2121
type PadVec struct {
2222
Size int64
2323
}
2424

25+
type DataReader interface {
26+
io.ReaderAt
27+
io.Closer
28+
}
29+
30+
func nopCloser(r io.ReaderAt) DataReader {
31+
return noopCloser{r}
32+
}
33+
34+
type noopCloser struct {
35+
io.ReaderAt
36+
}
37+
38+
func (n noopCloser) Close() error { return nil }
39+
2540
// Datavec is an interface for all vector types
2641
type Datavec interface {
2742
Clone() Datavec
2843
GetSize() int64
29-
Open() error
30-
Close()
31-
ReadAt(b []byte, off int64) (int, error)
44+
Open() (DataReader, error)
3245
}
3346

3447
// GetSize gets the size of the memory vec
3548
func (m MemVec) GetSize() int64 {
3649
return int64(len(m.Data))
3750
}
3851

39-
// Open opens a memory vec
40-
func (m MemVec) Open() error {
41-
return nil
42-
}
43-
44-
// Close closes the memory vec
45-
func (m MemVec) Close() {
46-
}
47-
4852
func (m MemVec) Clone() Datavec {
4953
return m
5054
}
5155

52-
// ReadAt reads at an offset of a memory vec
53-
func (m MemVec) ReadAt(b []byte, off int64) (int, error) {
54-
var end int64
55-
if int64(len(m.Data))-off > int64(len(b)) {
56-
end = off + int64(len(b))
57-
} else {
58-
end = off + int64(len(m.Data))
59-
}
60-
if end > int64(len(m.Data)) {
61-
end = int64(len(m.Data))
62-
}
63-
64-
n := copy(b, m.Data[off:end])
65-
if n == 0 {
66-
return n, io.EOF
67-
}
68-
return n, nil
56+
// Open opens a memory vec
57+
func (m MemVec) Open() (DataReader, error) {
58+
return nopCloser(bytes.NewReader(m.Data)), nil
6959
}
7060

7161
// GetSize gets the file size of the path vec
@@ -74,40 +64,12 @@ func (p PathVec) GetSize() int64 {
7464
}
7565

7666
// Open opens a file represented by a path vec
77-
func (p *PathVec) Open() error {
78-
var err error
79-
p.file, err = os.Open(p.Path)
80-
if err != nil {
81-
return err
82-
}
83-
84-
return nil
85-
}
86-
87-
// Close closes the file represented by the path vec
88-
func (p *PathVec) Close() {
89-
p.file.Close()
90-
}
91-
92-
// ReadAt reads the file represented by path vec at the given offset
93-
func (p *PathVec) ReadAt(b []byte, off int64) (int, error) {
94-
n, err := p.file.ReadAt(b, off)
95-
if err == io.EOF {
96-
return n, nil
97-
}
98-
if n == 0 {
99-
return n, io.EOF
100-
}
101-
return n, err
67+
func (p *PathVec) Open() (DataReader, error) {
68+
return os.Open(p.Path)
10269
}
10370

10471
func (p *PathVec) Clone() Datavec {
105-
// Clone the path vec by creating a new instance with the same path and info
106-
return &PathVec{
107-
Path: p.Path,
108-
Info: p.Info,
109-
file: nil,
110-
}
72+
return p
11173
}
11274

11375
// GetSize gets the size of the padding vec
@@ -116,27 +78,31 @@ func (p PadVec) GetSize() int64 {
11678
}
11779

11880
// Open opens the padding vec
119-
func (p PadVec) Open() error {
120-
return nil
81+
func (p PadVec) Open() (DataReader, error) {
82+
return padReader{p.Size}, nil
12183
}
12284

123-
// Close closes the padding vec
124-
func (p PadVec) Close() {
85+
func (p PadVec) Clone() Datavec {
86+
return p
12587
}
12688

127-
// ReadAt read the padding vec at a given offset (which is always 0s)
128-
func (p PadVec) ReadAt(b []byte, off int64) (int, error) {
129-
n := min(int(p.Size-off), len(b))
89+
type padReader struct {
90+
size int64
91+
}
13092

131-
if n == 0 {
93+
func (r padReader) ReadAt(b []byte, off int64) (int, error) {
94+
rem := int(r.size - off)
95+
if rem == 0 {
13296
return 0, io.EOF
13397
}
98+
99+
n := min(rem, len(b))
134100
for i := range n {
135101
b[i] = 0
136102
}
137103
return n, nil
138104
}
139105

140-
func (p PadVec) Clone() Datavec {
141-
return p
106+
func (r padReader) Close() error {
107+
return nil
142108
}

0 commit comments

Comments
 (0)