Skip to content

Commit 26928ef

Browse files
committed
sstable/colblk: remove deps on BlockDecoder in DataBlockDecoder
This patch removes most dependencies of `BlockDecoder` from the `DataBlockDecoder`. Instead, we use a `BlockDecoder` when constructing the block metadata. Informs: #5157
1 parent 5d018b9 commit 26928ef

File tree

8 files changed

+80
-78
lines changed

8 files changed

+80
-78
lines changed

cockroachkvs/cockroachkvs.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -498,9 +498,9 @@ var KeySchema = colblk.KeySchema{
498498
NewKeyWriter: func() colblk.KeyWriter {
499499
return makeCockroachKeyWriter()
500500
},
501-
InitKeySeekerMetadata: func(meta *colblk.KeySeekerMetadata, d *colblk.DataBlockDecoder) {
501+
InitKeySeekerMetadata: func(meta *colblk.KeySeekerMetadata, d *colblk.DataBlockDecoder, bd *colblk.BlockDecoder) {
502502
ks := (*cockroachKeySeeker)(unsafe.Pointer(meta))
503-
ks.init(d)
503+
ks.init(d, bd)
504504
},
505505
KeySeeker: func(meta *colblk.KeySeekerMetadata) colblk.KeySeeker {
506506
return (*cockroachKeySeeker)(unsafe.Pointer(meta))
@@ -747,14 +747,14 @@ var _ uint = colblk.KeySeekerMetadataSize - uint(unsafe.Sizeof(cockroachKeySeeke
747747

748748
var _ colblk.KeySeeker = (*cockroachKeySeeker)(nil)
749749

750-
func (ks *cockroachKeySeeker) init(d *colblk.DataBlockDecoder) {
751-
bd := d.BlockDecoder()
750+
func (ks *cockroachKeySeeker) init(d *colblk.DataBlockDecoder, bd *colblk.BlockDecoder) {
752751
ks.roachKeys = bd.PrefixBytes(cockroachColRoachKey)
753752
ks.roachKeyChanged = d.PrefixChanged()
754753
ks.mvccWallTimes = bd.Uints(cockroachColMVCCWallTime)
755754
ks.mvccLogical = bd.Uints(cockroachColMVCCLogical)
756755
ks.untypedVersions = bd.RawBytes(cockroachColUntypedVersion)
757-
header := d.KeySchemaHeader()
756+
header := bd.Header()
757+
header = header[:len(header)-colblk.DataBlockCustomHeaderSize]
758758
if len(header) != 1 {
759759
panic(errors.AssertionFailedf("invalid key schema-specific header %x", header))
760760
}

cockroachkvs/cockroachkvs_bench_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,8 @@ func benchmarkCockroachDataColBlockIter(
318318
it.InitOnce(&KeySchema, &Comparer, getInternalValuer(func([]byte) base.InternalValue {
319319
return base.MakeInPlaceValue([]byte("mock external value"))
320320
}))
321-
decoder.Init(&KeySchema, serializedBlock)
322-
if err := it.Init(&decoder, transforms); err != nil {
321+
bd := decoder.Init(&KeySchema, serializedBlock)
322+
if err := it.Init(&decoder, bd, transforms); err != nil {
323323
b.Fatal(err)
324324
}
325325
avgRowSize := float64(len(serializedBlock)) / float64(len(keys))

cockroachkvs/cockroachkvs_test.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,14 @@ func TestKeySchema_KeySeeker(t *testing.T) {
204204
var buf bytes.Buffer
205205
var enc colblk.DataBlockEncoder
206206
var dec colblk.DataBlockDecoder
207+
var bd *colblk.BlockDecoder
207208
var ks colblk.KeySeeker
208209
var maxKeyLen int
209210
enc.Init(&KeySchema)
210211

211212
initKeySeeker := func() {
212213
ksPointer := &cockroachKeySeeker{}
213-
KeySchema.InitKeySeekerMetadata((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)), &dec)
214+
KeySchema.InitKeySeekerMetadata((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)), &dec, bd)
214215
ks = KeySchema.KeySeeker((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)))
215216
}
216217

@@ -234,7 +235,7 @@ func TestKeySchema_KeySeeker(t *testing.T) {
234235
rows++
235236
}
236237
blk, _ := enc.Finish(rows, enc.Size())
237-
dec.Init(&KeySchema, blk)
238+
bd = dec.Init(&KeySchema, blk)
238239
return buf.String()
239240
case "is-lower-bound":
240241
initKeySeeker()
@@ -256,7 +257,7 @@ func TestKeySchema_KeySeeker(t *testing.T) {
256257

257258
fmt.Fprintf(&buf, "SeekGE(%s, boundRow=%d, searchDir=%d) = (row=%d, equalPrefix=%t)",
258259
line, boundRow, searchDir, row, equalPrefix)
259-
if row >= 0 && row < dec.BlockDecoder().Rows() {
260+
if row >= 0 && row < bd.Rows() {
260261
var kiter colblk.PrefixBytesIter
261262
kiter.Buf = make([]byte, maxKeyLen+1)
262263
key := ks.MaterializeUserKey(&kiter, -1, row)
@@ -412,8 +413,8 @@ func testCockroachDataColBlock(t *testing.T, seed uint64, keyCfg KeyGenConfig) {
412413
it.InitOnce(&KeySchema, &Comparer, getInternalValuer(func([]byte) base.InternalValue {
413414
return base.MakeInPlaceValue([]byte("mock external value"))
414415
}))
415-
decoder.Init(&KeySchema, serializedBlock)
416-
if err := it.Init(&decoder, blockiter.Transforms{}); err != nil {
416+
bd := decoder.Init(&KeySchema, serializedBlock)
417+
if err := it.Init(&decoder, bd, blockiter.Transforms{}); err != nil {
417418
t.Fatal(err)
418419
}
419420
// Scan the block using Next and ensure that all the keys values match.

cockroachkvs/key_schema_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,23 +66,23 @@ func runDataDrivenTest(t *testing.T, path string) {
6666

6767
case "describe":
6868
var d colblk.DataBlockDecoder
69-
d.Init(&KeySchema, blockData)
69+
bd := d.Init(&KeySchema, blockData)
7070
f := binfmt.New(blockData)
7171
tp := treeprinter.New()
72-
d.Describe(f, tp)
72+
d.Describe(f, tp, bd)
7373
return tp.String()
7474

7575
case "suffix-types":
7676
var d colblk.DataBlockDecoder
77-
d.Init(&KeySchema, blockData)
77+
bd := d.Init(&KeySchema, blockData)
7878
var ks cockroachKeySeeker
79-
ks.init(&d)
79+
ks.init(&d, bd)
8080
return fmt.Sprintf("suffix-types: %s", ks.suffixTypes)
8181

8282
case "keys":
8383
var d colblk.DataBlockDecoder
84-
d.Init(&KeySchema, blockData)
85-
require.NoError(t, iter.Init(&d, blockiter.Transforms{}))
84+
bd := d.Init(&KeySchema, blockData)
85+
require.NoError(t, iter.Init(&d, bd, blockiter.Transforms{}))
8686
defer iter.Close()
8787
var buf bytes.Buffer
8888
var prevKey base.InternalKey
@@ -98,8 +98,8 @@ func runDataDrivenTest(t *testing.T, path string) {
9898

9999
case "seek":
100100
var d colblk.DataBlockDecoder
101-
d.Init(&KeySchema, blockData)
102-
require.NoError(t, iter.Init(&d, blockiter.Transforms{}))
101+
bd := d.Init(&KeySchema, blockData)
102+
require.NoError(t, iter.Init(&d, bd, blockiter.Transforms{}))
103103
defer iter.Close()
104104
var buf strings.Builder
105105
for _, l := range crstrings.Lines(td.Input) {
@@ -145,10 +145,10 @@ func TestKeySchema_RandomKeys(t *testing.T) {
145145
blk = crbytes.CopyAligned(blk)
146146

147147
var dec colblk.DataBlockDecoder
148-
dec.Init(&KeySchema, blk)
148+
bd := dec.Init(&KeySchema, blk)
149149
var it colblk.DataBlockIter
150150
it.InitOnce(&KeySchema, &Comparer, nil)
151-
require.NoError(t, it.Init(&dec, blockiter.NoTransforms))
151+
require.NoError(t, it.Init(&dec, bd, blockiter.NoTransforms))
152152
// Ensure that a scan across the block finds all the relevant keys.
153153
var valBuf []byte
154154
for k, kv := 0, it.First(); kv != nil; k, kv = k+1, it.Next() {

sstable/colblk/block.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,11 @@ func (d *BlockDecoder) Data() []byte {
372372
return d.data
373373
}
374374

375+
// Header returns the underlying custom header.
376+
func (d *BlockDecoder) Header() []byte {
377+
return d.data[:d.customHeaderSize]
378+
}
379+
375380
func (d *BlockDecoder) pageStart(col int) uint32 {
376381
if uint16(col) >= d.header.Columns {
377382
// -1 for the trailing version byte

sstable/colblk/data_block.go

Lines changed: 39 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ type KeySchema struct {
4444
// InitKeySeekerMetadata initializes the provided KeySeekerMetadata. This
4545
// happens once when a block enters the block cache and can be used to save
4646
// computation in NewKeySeeker.
47-
InitKeySeekerMetadata func(meta *KeySeekerMetadata, d *DataBlockDecoder)
47+
InitKeySeekerMetadata func(meta *KeySeekerMetadata, d *DataBlockDecoder, bd *BlockDecoder)
4848

4949
// KeySeeker returns a KeySeeker using metadata that was previously
5050
// initialized with InitKeySeekerMetadata. The returned key seeker can be an
@@ -215,10 +215,10 @@ func DefaultKeySchema(comparer *base.Comparer, prefixBundleSize int) KeySchema {
215215
kw.suffixes.Init()
216216
return kw
217217
},
218-
InitKeySeekerMetadata: func(meta *KeySeekerMetadata, d *DataBlockDecoder) {
218+
InitKeySeekerMetadata: func(meta *KeySeekerMetadata, d *DataBlockDecoder, bd *BlockDecoder) {
219219
ks := (*defaultKeySeeker)(unsafe.Pointer(&meta[0]))
220220
ks.comparer = comparer
221-
ks.init(d)
221+
ks.init(d, bd)
222222
},
223223
KeySeeker: func(meta *KeySeekerMetadata) KeySeeker {
224224
ks := (*defaultKeySeeker)(unsafe.Pointer(&meta[0]))
@@ -358,10 +358,10 @@ type defaultKeySeeker struct {
358358
sharedPrefix []byte
359359
}
360360

361-
func (ks *defaultKeySeeker) init(d *DataBlockDecoder) {
361+
func (ks *defaultKeySeeker) init(d *DataBlockDecoder, bd *BlockDecoder) {
362362
ks.decoder = d
363-
ks.prefixes = d.d.PrefixBytes(defaultKeySchemaColumnPrefix)
364-
ks.suffixes = d.d.RawBytes(defaultKeySchemaColumnSuffix)
363+
ks.prefixes = bd.PrefixBytes(defaultKeySchemaColumnPrefix)
364+
ks.suffixes = bd.RawBytes(defaultKeySchemaColumnSuffix)
365365
ks.sharedPrefix = ks.prefixes.SharedPrefix()
366366
}
367367

@@ -498,7 +498,7 @@ const (
498498
// grow key buffers while iterating over the block, ensuring that the key buffer
499499
// is always sufficiently large.
500500
// This is serialized immediately after the KeySchema specific header.
501-
const dataBlockCustomHeaderSize = 4
501+
const DataBlockCustomHeaderSize = 4
502502

503503
// Init initializes the data block writer.
504504
func (w *DataBlockEncoder) Init(schema *KeySchema) {
@@ -605,7 +605,7 @@ func (w *DataBlockEncoder) Rows() int {
605605

606606
// Size returns the size of the current pending data block.
607607
func (w *DataBlockEncoder) Size() int {
608-
off := HeaderSize(len(w.Schema.ColumnTypes)+dataBlockColumnMax, dataBlockCustomHeaderSize+w.Schema.HeaderSize)
608+
off := HeaderSize(len(w.Schema.ColumnTypes)+dataBlockColumnMax, DataBlockCustomHeaderSize+w.Schema.HeaderSize)
609609
off = w.KeyWriter.Size(w.rows, off)
610610
off = w.trailers.Size(w.rows, off)
611611
off = w.prefixSame.InvertedSize(w.rows, off)
@@ -646,12 +646,12 @@ func (w *DataBlockEncoder) Finish(rows, size int) (finished []byte, lastKey base
646646
// to represent when the prefix changes.
647647
w.prefixSame.Invert(rows)
648648

649-
w.enc.Init(size, h, dataBlockCustomHeaderSize+w.Schema.HeaderSize)
649+
w.enc.Init(size, h, DataBlockCustomHeaderSize+w.Schema.HeaderSize)
650650

651651
// Write the key schema custom header.
652652
w.KeyWriter.FinishHeader(w.enc.Data()[:w.Schema.HeaderSize])
653653
// Write the max key length in the data block custom header.
654-
binary.LittleEndian.PutUint32(w.enc.Data()[w.Schema.HeaderSize:w.Schema.HeaderSize+dataBlockCustomHeaderSize], uint32(w.maximumKeyLength))
654+
binary.LittleEndian.PutUint32(w.enc.Data()[w.Schema.HeaderSize:w.Schema.HeaderSize+DataBlockCustomHeaderSize], uint32(w.maximumKeyLength))
655655
w.enc.Encode(rows, w.KeyWriter)
656656
w.enc.Encode(rows, &w.trailers)
657657
w.enc.Encode(rows, &w.prefixSame)
@@ -744,12 +744,12 @@ func (rw *DataBlockRewriter) RewriteSuffixes(
744744
// better spent dropping support for the physical rewriting of data blocks
745745
// we're performing here and instead use a read-time IterTransform.
746746

747-
rw.decoder.Init(rw.KeySchema, input)
747+
bd := rw.decoder.Init(rw.KeySchema, input)
748748
meta := &KeySeekerMetadata{}
749-
rw.KeySchema.InitKeySeekerMetadata(meta, &rw.decoder)
749+
rw.KeySchema.InitKeySeekerMetadata(meta, &rw.decoder, bd)
750750
rw.keySeeker = rw.KeySchema.KeySeeker(meta)
751751
rw.encoder.Reset()
752-
if err = rw.iter.Init(&rw.decoder, blockiter.Transforms{}); err != nil {
752+
if err = rw.iter.Init(&rw.decoder, bd, blockiter.Transforms{}); err != nil {
753753
return base.InternalKey{}, base.InternalKey{}, nil, err
754754
}
755755

@@ -786,7 +786,7 @@ func (rw *DataBlockRewriter) RewriteSuffixes(
786786
k := base.InternalKey{UserKey: rw.keyBuf, Trailer: kv.K.Trailer}
787787
rw.encoder.Add(k, value, valuePrefix, kcmp, rw.decoder.isObsolete.At(i))
788788
}
789-
rewritten, end = rw.encoder.Finish(int(rw.decoder.d.header.Rows), rw.encoder.Size())
789+
rewritten, end = rw.encoder.Finish(int(bd.header.Rows), rw.encoder.Size())
790790
end.UserKey, rw.keyAlloc = rw.keyAlloc.Copy(end.UserKey)
791791
return start, end, rewritten, nil
792792
}
@@ -814,8 +814,8 @@ func InitDataBlockMetadata(schema *KeySchema, md *block.Metadata, data []byte) (
814814
err = base.CorruptionErrorf("error initializing data block metadata: %v", r)
815815
}
816816
}()
817-
metadatas.d.Init(schema, data)
818-
schema.InitKeySeekerMetadata(&metadatas.keySchemaMeta, &metadatas.d)
817+
bd := metadatas.d.Init(schema, data)
818+
schema.InitKeySeekerMetadata(&metadatas.keySchemaMeta, &metadatas.d, bd)
819819
return nil
820820
}
821821

@@ -885,38 +885,31 @@ type DataBlockDecoder struct {
885885
maximumKeyLength uint32
886886
}
887887

888-
// BlockDecoder returns a pointer to the underlying BlockDecoder.
889-
func (d *DataBlockDecoder) BlockDecoder() *BlockDecoder {
890-
return &d.d
891-
}
892-
893888
// PrefixChanged returns the prefix-changed bitmap.
894889
func (d *DataBlockDecoder) PrefixChanged() Bitmap {
895890
return d.prefixChanged
896891
}
897892

898-
// KeySchemaHeader returns the KeySchema-specific header.
899-
func (d *DataBlockDecoder) KeySchemaHeader() []byte {
900-
return d.d.data[:d.d.customHeaderSize-dataBlockCustomHeaderSize]
901-
}
902-
903893
// Init initializes the data block reader with the given serialized data block.
904-
func (d *DataBlockDecoder) Init(schema *KeySchema, data []byte) {
894+
func (d *DataBlockDecoder) Init(schema *KeySchema, data []byte) *BlockDecoder {
905895
if uintptr(unsafe.Pointer(unsafe.SliceData(data)))&7 != 0 {
906896
panic("data buffer not 8-byte aligned")
907897
}
908-
d.d.Init(data, dataBlockCustomHeaderSize+schema.HeaderSize)
909-
d.trailers = d.d.Uints(len(schema.ColumnTypes) + dataBlockColumnTrailer)
910-
d.prefixChanged = d.d.Bitmap(len(schema.ColumnTypes) + dataBlockColumnPrefixChanged)
911-
d.values = d.d.RawBytes(len(schema.ColumnTypes) + dataBlockColumnValue)
912-
d.isValueExternal = d.d.Bitmap(len(schema.ColumnTypes) + dataBlockColumnIsValueExternal)
913-
d.isObsolete = d.d.Bitmap(len(schema.ColumnTypes) + dataBlockColumnIsObsolete)
898+
bd := BlockDecoder{}
899+
bd.Init(data, DataBlockCustomHeaderSize+schema.HeaderSize)
900+
d.d = bd
901+
d.trailers = bd.Uints(len(schema.ColumnTypes) + dataBlockColumnTrailer)
902+
d.prefixChanged = bd.Bitmap(len(schema.ColumnTypes) + dataBlockColumnPrefixChanged)
903+
d.values = bd.RawBytes(len(schema.ColumnTypes) + dataBlockColumnValue)
904+
d.isValueExternal = bd.Bitmap(len(schema.ColumnTypes) + dataBlockColumnIsValueExternal)
905+
d.isObsolete = bd.Bitmap(len(schema.ColumnTypes) + dataBlockColumnIsObsolete)
914906
d.maximumKeyLength = binary.LittleEndian.Uint32(data[schema.HeaderSize:])
907+
return &bd
915908
}
916909

917910
// Describe descirbes the binary format of the data block, assuming f.Offset()
918911
// is positioned at the beginning of the same data block described by d.
919-
func (d *DataBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
912+
func (d *DataBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node, bd *BlockDecoder) {
920913
// Set the relative offset. When loaded into memory, the beginning of blocks
921914
// are aligned. Padding that ensures alignment is done relative to the
922915
// current offset. Setting the relative offset ensures that if we're
@@ -926,13 +919,13 @@ func (d *DataBlockDecoder) Describe(f *binfmt.Formatter, tp treeprinter.Node) {
926919
f.SetAnchorOffset()
927920

928921
n := tp.Child("data block header")
929-
if keySchemaHeaderSize := int(d.d.customHeaderSize - 4); keySchemaHeaderSize > 0 {
922+
if keySchemaHeaderSize := int(bd.customHeaderSize - DataBlockCustomHeaderSize); keySchemaHeaderSize > 0 {
930923
f.HexBytesln(keySchemaHeaderSize, "key schema header")
931924
}
932925
f.HexBytesln(4, "maximum key length: %d", d.maximumKeyLength)
933-
d.d.HeaderToBinFormatter(f, n)
934-
for i := 0; i < int(d.d.header.Columns); i++ {
935-
d.d.ColumnToBinFormatter(f, n, i, int(d.d.header.Rows))
926+
bd.HeaderToBinFormatter(f, n)
927+
for i := 0; i < int(bd.header.Columns); i++ {
928+
bd.ColumnToBinFormatter(f, n, i, int(bd.header.Rows))
936929
}
937930
f.HexBytesln(1, "block padding byte")
938931
f.ToTreePrinter(n)
@@ -953,9 +946,9 @@ type DataBlockValidator struct {
953946
func (v *DataBlockValidator) Validate(
954947
data []byte, comparer *base.Comparer, keySchema *KeySchema,
955948
) error {
956-
v.dec.Init(keySchema, data)
957-
n := v.dec.d.header.Rows
958-
keySchema.InitKeySeekerMetadata(&v.keySeekerMeta, &v.dec)
949+
bd := v.dec.Init(keySchema, data)
950+
n := bd.header.Rows
951+
keySchema.InitKeySeekerMetadata(&v.keySeekerMeta, &v.dec, bd)
959952
keySeeker := keySchema.KeySeeker(&v.keySeekerMeta)
960953

961954
if cap(v.prevUserKeyBuf) < int(v.dec.maximumKeyLength)+1 {
@@ -1057,10 +1050,12 @@ func (i *DataBlockIter) InitOnce(
10571050

10581051
// Init initializes the data block iterator, configuring it to read from the
10591052
// provided decoder.
1060-
func (i *DataBlockIter) Init(d *DataBlockDecoder, transforms blockiter.Transforms) error {
1053+
func (i *DataBlockIter) Init(
1054+
d *DataBlockDecoder, bd *BlockDecoder, transforms blockiter.Transforms,
1055+
) error {
10611056
i.d = d
10621057
// Leave i.h unchanged.
1063-
numRows := int(d.d.header.Rows)
1058+
numRows := int(bd.header.Rows)
10641059
i.maxRow = numRows - 1
10651060
i.transforms = transforms
10661061
if i.transforms.HideObsoletePoints && d.isObsolete.SeekSetBitGE(0) == numRows {
@@ -1071,7 +1066,7 @@ func (i *DataBlockIter) Init(d *DataBlockDecoder, transforms blockiter.Transform
10711066

10721067
// TODO(radu): see if this allocation can be a problem for the suffix rewriter.
10731068
meta := &KeySeekerMetadata{}
1074-
i.keySchema.InitKeySeekerMetadata(meta, d)
1069+
i.keySchema.InitKeySeekerMetadata(meta, d, bd)
10751070
i.keySeeker = i.keySchema.KeySeeker(meta)
10761071

10771072
// The worst case is when the largest key in the block has no suffix.

0 commit comments

Comments
 (0)