Skip to content

Commit 72ee608

Browse files
committed
HPCC-34657 Hybrid index improve how fpos are stored
- Moved file positions to end of record - Optional removed file positions if they will always be zero Signed-off-by: James McMullan [email protected]
1 parent a8c1070 commit 72ee608

File tree

4 files changed

+72
-28
lines changed

4 files changed

+72
-28
lines changed

system/jhtree/hlzw.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ void KeyCompressor::openBlob(void *blk,int blksize)
9999
method = comp->getCompressionMethod();
100100
}
101101

102-
int KeyCompressor::writekey(offset_t fPtr, const char *key, unsigned datalength)
102+
int KeyCompressor::writekey(offset_t fPtr, const char *key, unsigned datalength, unsigned options)
103103
{
104104
assert(!isBlob);
105105
assertex(__BYTE_ORDER == __LITTLE_ENDIAN); // otherwise the following code is wrong.
@@ -111,8 +111,14 @@ int KeyCompressor::writekey(offset_t fPtr, const char *key, unsigned datalength)
111111
KEYRECSIZE_T rs = datalength;
112112
tempKeyBuffer.appendSwap(sizeof(rs), &rs);
113113
}
114-
tempKeyBuffer.appendSwap(sizeof(offset_t), &fPtr);
114+
115+
bool hasTrailingFilePos = (options & TrailingFilePosition) != 0 && (options & NoFilePosition) == 0;
116+
bool hasLeadingFilePos = (options & NoFilePosition) == 0 && !hasTrailingFilePos;
117+
if (hasLeadingFilePos)
118+
tempKeyBuffer.appendSwap(sizeof(offset_t), &fPtr);
115119
tempKeyBuffer.append(datalength, key);
120+
if (hasTrailingFilePos)
121+
tempKeyBuffer.appendSwap(sizeof(offset_t), &fPtr);
116122

117123
size32_t toWrite = tempKeyBuffer.length();
118124
if (comp->write(tempKeyBuffer.bufferBase(),toWrite)!=toWrite)

system/jhtree/hlzw.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,20 @@ typedef unsigned short KEYRECSIZE_T;
2727
class KeyCompressor final
2828
{
2929
public:
30+
enum KeyWriteOptions
31+
{
32+
DefaultWriteOptions= 0x00,
33+
TrailingFilePosition = 0x01,
34+
NoFilePosition = 0x02
35+
};
36+
3037
KeyCompressor() {}
3138
~KeyCompressor();
3239
void open(void *blk,int blksize, bool isVariable, bool rowcompression, size32_t fixedRowSize);
3340
void open(void *blk,int blksize, ICompressHandler * compressionHandler, const char * options, bool _isVariable, size32_t fixedRowSize);
3441
void open(void *blk,int blksize, ICompressor * compressor, bool _isVariable, size32_t _fixedRowSize);
3542

36-
int writekey(offset_t fPtr, const char *key, unsigned datalength);
43+
int writekey(offset_t fPtr, const char *key, unsigned datalength, unsigned writeOptions = DefaultWriteOptions);
3744
bool write(const void * data, size32_t datalength);
3845

3946
bool compressBlock(size32_t destSize, void * dest, size32_t srcSize, const void * src, ICompressHandler * compressionHandler, const char * options, bool isVariable, size32_t fixedSize);

system/jhtree/jhblockcompressed.cpp

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,7 @@ void CJHBlockCompressedSearchNode::load(CKeyHdr *_keyHdr, const void *rawData, o
110110

111111
keyLen = keyHdr->getMaxKeyLength();
112112
keyCompareLen = _keyHdr->getNodeKeyLength();
113-
keyRecLen = keyLen + sizeof(offset_t);
114-
113+
115114
const char *keys = ((const char *) rawData) + sizeof(hdr);
116115

117116
firstSequence = *(unsigned __int64 *) keys;
@@ -120,6 +119,11 @@ void CJHBlockCompressedSearchNode::load(CKeyHdr *_keyHdr, const void *rawData, o
120119

121120
CompressionMethod compressionMethod = *(CompressionMethod*) keys;
122121
keys += sizeof(CompressionMethod);
122+
123+
hasFilePosition = *(bool*) keys;
124+
keys += sizeof(bool);
125+
126+
keyRecLen = hasFilePosition ? (keyLen + sizeof(offset_t)) : keyLen;
123127

124128
CCycleTimer expansionTimer(true);
125129
keyBuf = expandBlock(keys, inMemorySize, compressionMethod);
@@ -128,7 +132,7 @@ void CJHBlockCompressedSearchNode::load(CKeyHdr *_keyHdr, const void *rawData, o
128132

129133
int CJHBlockCompressedSearchNode::compareValueAt(const char *src, unsigned int index) const
130134
{
131-
return memcmp(src, keyBuf + index*keyRecLen + (keyHdr->hasSpecialFileposition() ? sizeof(offset_t) : 0), keyCompareLen);
135+
return memcmp(src, keyBuf + index*keyRecLen, keyCompareLen);
132136
}
133137

134138
bool CJHBlockCompressedSearchNode::fetchPayload(unsigned int index, char *dst, PayloadReference & activePayload) const
@@ -139,11 +143,14 @@ bool CJHBlockCompressedSearchNode::fetchPayload(unsigned int index, char *dst, P
139143
const char * p = keyBuf + index*keyRecLen;
140144
if (keyHdr->hasSpecialFileposition())
141145
{
142-
//It would make sense to have the fileposition at the start of the row from the perspective of the
143-
//internal representation, but that would complicate everything else which assumes the keyed
144-
//fields start at the beginning of the row.
145-
memcpy(dst+keyCompareLen, p+keyCompareLen+sizeof(offset_t), keyLen-keyCompareLen);
146-
memcpy(dst+keyLen, p, sizeof(offset_t));
146+
memcpy(dst+keyCompareLen, p+keyCompareLen, keyLen-keyCompareLen);
147+
if (hasFilePosition)
148+
memcpy(dst+keyLen, p+keyLen, sizeof(offset_t));
149+
else
150+
{
151+
offset_t zeroPos = 0;
152+
memcpy(dst+keyLen, &zeroPos, sizeof(offset_t));
153+
}
147154
}
148155
else
149156
{
@@ -158,8 +165,6 @@ bool CJHBlockCompressedSearchNode::getKeyAt(unsigned int index, char *dst) const
158165
if (dst)
159166
{
160167
const char * p = keyBuf + index*keyRecLen;
161-
if (keyHdr->hasSpecialFileposition())
162-
p += sizeof(offset_t);
163168
memcpy(dst, p, keyCompareLen);
164169
}
165170
return true;
@@ -168,17 +173,23 @@ bool CJHBlockCompressedSearchNode::getKeyAt(unsigned int index, char *dst) const
168173
size32_t CJHBlockCompressedSearchNode::getSizeAt(unsigned int index) const
169174
{
170175
if (keyHdr->hasSpecialFileposition())
171-
return keyLen + sizeof(offset_t);
176+
{
177+
if (hasFilePosition)
178+
return keyLen + sizeof(offset_t);
179+
else
180+
return keyLen;
181+
}
172182
else
173183
return keyLen;
174184
}
175185

176186
offset_t CJHBlockCompressedSearchNode::getFPosAt(unsigned int index) const
177187
{
178188
if (index >= hdr.numKeys) return 0;
189+
if (!hasFilePosition) return 0;
179190

180191
offset_t pos;
181-
const char * p = keyBuf + index*keyRecLen;
192+
const char * p = keyBuf + index*keyRecLen + keyLen;
182193
memcpy( &pos, p, sizeof(__int64));
183194
_WINREV(pos);
184195
return pos;
@@ -252,16 +263,23 @@ bool CBlockCompressedWriteNode::add(offset_t pos, const void *indata, size32_t i
252263
memcpy(keyPtr, &context.compressionMethod, sizeof(context.compressionMethod));
253264
keyPtr += sizeof(context.compressionMethod);
254265
hdr.keyBytes += sizeof(context.compressionMethod);
266+
267+
bool hasFilepos = !context.zeroFilePos;
268+
memcpy(keyPtr, &hasFilepos, sizeof(bool));
269+
keyPtr += sizeof(bool);
270+
hdr.keyBytes += sizeof(bool);
255271

256-
//Adjust the fixed key size to include the fileposition field which is written by writekey.
272+
//Adjust the fixed key size to include the fileposition field which is written by writekey
257273
bool isVariable = keyHdr->isVariable();
258-
size32_t fixedKeySize = isVariable ? 0 : keyLen + sizeof(offset_t);
274+
size32_t fixedKeySize = isVariable ? 0 : (hasFilepos ? keyLen + sizeof(offset_t) : keyLen);
259275

260276
ICompressHandler * handler = queryCompressHandler(context.compressionMethod);
261277
compressor.open(keyPtr, maxBytes-hdr.keyBytes, handler, context.compressionOptions, isVariable, fixedKeySize);
262278
}
263279

264-
if (0xffff == hdr.numKeys || 0 == compressor.writekey(pos, (const char *)indata, insize))
280+
unsigned writeOptions = KeyCompressor::TrailingFilePosition | (context.zeroFilePos ? KeyCompressor::NoFilePosition : 0);
281+
int written = compressor.writekey(pos, (const char *)indata, insize, writeOptions);
282+
if (0xffff == hdr.numKeys || written == 0)
265283
return false;
266284

267285
if (insize>keyLen)
@@ -278,7 +296,7 @@ void CBlockCompressedWriteNode::finalize()
278296
{
279297
compressor.close();
280298
if (hdr.numKeys)
281-
hdr.keyBytes = compressor.buflen() + sizeof(unsigned __int64) + sizeof(CompressionMethod); // rsequence
299+
hdr.keyBytes = compressor.buflen() + sizeof(unsigned __int64) + sizeof(CompressionMethod) + sizeof(bool); // rsequence + compressionMethod + hasFilePosition
282300
}
283301

284302
BlockCompressedIndexCompressor::BlockCompressedIndexCompressor(unsigned keyedSize, IHThorIndexWriteArg *helper, const char* options)
@@ -310,6 +328,9 @@ BlockCompressedIndexCompressor::BlockCompressedIndexCompressor(unsigned keyedSiz
310328
context.compressionHandler = queryCompressHandler(compressionMethod);
311329
if (!context.compressionHandler)
312330
throw MakeStringException(0, "Unknown compression method %d", (int)compressionMethod);
331+
332+
if (helper && (helper->getFlags() & TIWzerofilepos))
333+
context.zeroFilePos = true;
313334
}
314335

315336
CJHBlockCompressedVarNode::CJHBlockCompressedVarNode() {}
@@ -329,13 +350,15 @@ void CJHBlockCompressedVarNode::load(CKeyHdr *_keyHdr, const void *rawData, offs
329350
recArray[i] = finger + sizeof(KEYRECSIZE_T);
330351
KEYRECSIZE_T recsize = *(KEYRECSIZE_T *)finger;
331352
_WINREV(recsize);
332-
finger += recsize + sizeof(KEYRECSIZE_T) + sizeof(offset_t);
353+
finger += recsize + sizeof(KEYRECSIZE_T);
354+
if (hasFilePosition)
355+
finger += sizeof(offset_t);
333356
}
334357
}
335358

336359
int CJHBlockCompressedVarNode::compareValueAt(const char *src, unsigned int index) const
337360
{
338-
return memcmp(src, recArray[index] + (keyHdr->hasSpecialFileposition() ? sizeof(offset_t) : 0), keyCompareLen);
361+
return memcmp(src, recArray[index], keyCompareLen);
339362
}
340363

341364
bool CJHBlockCompressedVarNode::fetchPayload(unsigned int num, char *dst, PayloadReference & activePayload) const
@@ -349,8 +372,14 @@ bool CJHBlockCompressedVarNode::fetchPayload(unsigned int num, char *dst, Payloa
349372
_WINREV(reclen);
350373
if (keyHdr->hasSpecialFileposition())
351374
{
352-
memcpy(dst+keyCompareLen, p+keyCompareLen+sizeof(offset_t), reclen-keyCompareLen);
353-
memcpy(dst+reclen, p, sizeof(offset_t));
375+
memcpy(dst+keyCompareLen, p+keyCompareLen, reclen-keyCompareLen);
376+
if (hasFilePosition)
377+
memcpy(dst+reclen, p+reclen, sizeof(offset_t));
378+
else
379+
{
380+
offset_t zeroPos = 0;
381+
memcpy(dst+reclen, &zeroPos, sizeof(offset_t));
382+
}
354383
}
355384
else
356385
memcpy(dst+keyCompareLen, p+keyCompareLen, reclen-keyCompareLen);
@@ -368,10 +397,7 @@ bool CJHBlockCompressedVarNode::getKeyAt(unsigned int num, char *dst) const
368397
KEYRECSIZE_T reclen = ((KEYRECSIZE_T *) p)[-1];
369398
_WINREV(reclen);
370399
assertex(reclen >= keyCompareLen);
371-
if (keyHdr->hasSpecialFileposition())
372-
memcpy(dst, p + sizeof(offset_t), keyCompareLen);
373-
else
374-
memcpy(dst, p, keyCompareLen);
400+
memcpy(dst, p, keyCompareLen);
375401
}
376402
return true;
377403
}
@@ -390,10 +416,13 @@ size32_t CJHBlockCompressedVarNode::getSizeAt(unsigned int num) const
390416
offset_t CJHBlockCompressedVarNode::getFPosAt(unsigned int num) const
391417
{
392418
if (num >= hdr.numKeys) return 0;
419+
if (!hasFilePosition) return 0;
393420

394421
const char * p = recArray[num];
422+
KEYRECSIZE_T reclen = ((KEYRECSIZE_T *) p)[-1];
423+
_WINREV(reclen);
395424
offset_t pos;
396-
memcpy( &pos, p, sizeof(__int64) );
425+
memcpy( &pos, p + reclen, sizeof(__int64) );
397426
_WINREV(pos);
398427
return pos;
399428
}

system/jhtree/jhblockcompressed.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class CJHBlockCompressedSearchNode : public CJHSearchNode
3333
size32_t keyLen = 0;
3434
size32_t keyCompareLen = 0;
3535
size32_t keyRecLen = 0;
36+
bool hasFilePosition = true;
3637

3738
unsigned __int64 firstSequence = 0;
3839

@@ -76,6 +77,7 @@ struct CBlockCompressedBuildContext
7677
ICompressHandler* compressionHandler = nullptr;
7778
StringBuffer compressionOptions;
7879
CompressionMethod compressionMethod = COMPRESS_METHOD_ZSTDS;
80+
bool zeroFilePos = false;
7981
};
8082

8183
class jhtree_decl CBlockCompressedWriteNode : public CWriteNode

0 commit comments

Comments
 (0)