@@ -890,20 +890,21 @@ bool ResultSet::isDirectColumnarConversionPossible() const {
890890bool ResultSet::isZeroCopyColumnarConversionPossible (size_t column_idx) const {
891891 return query_mem_desc_.didOutputColumnar () &&
892892 query_mem_desc_.getQueryDescriptionType () == QueryDescriptionType::Projection &&
893- appended_storage_.empty () && storage_ &&
893+ ! colType (column_idx)-> isVarLen () && appended_storage_.empty () && storage_ &&
894894 (lazy_fetch_info_.empty () || !lazy_fetch_info_[column_idx].is_lazily_fetched );
895895}
896896
897897bool ResultSet::isChunkedZeroCopyColumnarConversionPossible (size_t column_idx) const {
898898 return query_mem_desc_.didOutputColumnar () &&
899899 query_mem_desc_.getQueryDescriptionType () == QueryDescriptionType::Projection &&
900- storage_ &&
900+ ! colType (column_idx)-> isVarLen () && storage_ &&
901901 (lazy_fetch_info_.empty () || !lazy_fetch_info_[column_idx].is_lazily_fetched );
902902}
903903
904904const int8_t * ResultSet::getColumnarBuffer (size_t column_idx) const {
905905 CHECK (isZeroCopyColumnarConversionPossible (column_idx));
906- return storage_->getUnderlyingBuffer () + query_mem_desc_.getColOffInBytes (column_idx);
906+ size_t slot_idx = query_mem_desc_.getSlotIndexForSingleSlotCol (column_idx);
907+ return storage_->getUnderlyingBuffer () + query_mem_desc_.getColOffInBytes (slot_idx);
907908}
908909
909910std::vector<std::pair<const int8_t *, size_t >> ResultSet::getChunkedColumnarBuffer (
@@ -917,14 +918,15 @@ std::vector<std::pair<const int8_t*, size_t>> ResultSet::getChunkedColumnarBuffe
917918 size_t rows_to_skip = drop_first_;
918919 // RowCount value should be cached and take into account size, limit and offset
919920 size_t rows_to_fetch = rowCount ();
921+ size_t slot_idx = query_mem_desc_.getSlotIndexForSingleSlotCol (column_idx);
920922
921923 if (current_storage_rows <= rows_to_skip) {
922924 rows_to_skip -= current_storage_rows;
923925 } else {
924926 size_t fetch_from_current_storage =
925927 std::min (current_storage_rows - rows_to_skip, rows_to_fetch);
926928 retval.emplace_back (storage_->getUnderlyingBuffer () +
927- storage_->getColOffInBytes (column_idx ) +
929+ storage_->getColOffInBytes (slot_idx ) +
928930 colType (column_idx)->size () * rows_to_skip,
929931 fetch_from_current_storage);
930932 rows_to_fetch -= fetch_from_current_storage;
@@ -936,7 +938,7 @@ std::vector<std::pair<const int8_t*, size_t>> ResultSet::getChunkedColumnarBuffe
936938 break ;
937939 }
938940 const int8_t * ptr =
939- storage_uptr->getUnderlyingBuffer () + storage_uptr->getColOffInBytes (column_idx );
941+ storage_uptr->getUnderlyingBuffer () + storage_uptr->getColOffInBytes (slot_idx );
940942 current_storage_rows = storage_uptr->binSearchRowCount ();
941943 if (current_storage_rows <= rows_to_skip) {
942944 rows_to_skip -= current_storage_rows;
@@ -952,6 +954,132 @@ std::vector<std::pair<const int8_t*, size_t>> ResultSet::getChunkedColumnarBuffe
952954 return retval;
953955}
954956
957+ size_t ResultSet::computeVarLenOffsets (size_t col_idx, int32_t * offsets) const {
958+ auto type = colType (col_idx);
959+ CHECK (type->isVarLen ());
960+ size_t arr_elem_size =
961+ type->isVarLenArray () ? type->as <hdk::ir::ArrayBaseType>()->elemType ()->size () : 1 ;
962+ bool lazy_fetch =
963+ !lazy_fetch_info_.empty () && lazy_fetch_info_[col_idx].is_lazily_fetched ;
964+
965+ size_t data_slot_idx = 0 ;
966+ size_t data_slot_offs = 0 ;
967+ size_t size_slot_idx = 0 ;
968+ size_t size_slot_offs = 0 ;
969+ // Compute required slot index.
970+ for (size_t i = 0 ; i < col_idx; ++i) {
971+ // slot offset in a row is computed for rowwise access.
972+ if (!query_mem_desc_.didOutputColumnar ()) {
973+ data_slot_offs = advance_target_ptr_row_wise (data_slot_offs,
974+ targets_[i],
975+ data_slot_idx,
976+ query_mem_desc_,
977+ separate_varlen_storage_valid_);
978+ }
979+ data_slot_idx =
980+ advance_slot (data_slot_idx, targets_[i], separate_varlen_storage_valid_);
981+ }
982+ if (!separate_varlen_storage_valid_ && !lazy_fetch) {
983+ size_slot_offs =
984+ data_slot_offs + query_mem_desc_.getPaddedSlotWidthBytes (data_slot_idx);
985+ size_slot_idx = data_slot_idx + 1 ;
986+ } else {
987+ size_slot_idx = data_slot_idx;
988+ size_slot_offs = data_slot_offs;
989+ }
990+
991+ // Translate varlen value to its length. Return -1 for NULLs.
992+ auto slot_val_to_length = [this , lazy_fetch, col_idx, type](
993+ size_t storage_idx,
994+ int64_t val,
995+ const int8_t * size_slot_ptr,
996+ size_t size_slot_sz) -> int32_t {
997+ if (separate_varlen_storage_valid_ && !targets_[col_idx].is_agg ) {
998+ if (val >= 0 ) {
999+ const auto & varlen_buffer_for_storage = serialized_varlen_buffer_[storage_idx];
1000+ return varlen_buffer_for_storage[val].size ();
1001+ }
1002+ return -1 ;
1003+ }
1004+
1005+ if (lazy_fetch) {
1006+ auto & frag_col_buffers = getColumnFrag (storage_idx, col_idx, val);
1007+ bool is_end{false };
1008+ if (type->isString ()) {
1009+ VarlenDatum vd;
1010+ ChunkIter_get_nth (reinterpret_cast <ChunkIter*>(const_cast <int8_t *>(
1011+ frag_col_buffers[lazy_fetch_info_[col_idx].local_col_id ])),
1012+ val,
1013+ false ,
1014+ &vd,
1015+ &is_end);
1016+ CHECK (!is_end);
1017+ return vd.is_null ? -1 : vd.length ;
1018+ } else {
1019+ ArrayDatum ad;
1020+ ChunkIter_get_nth (reinterpret_cast <ChunkIter*>(const_cast <int8_t *>(
1021+ frag_col_buffers[lazy_fetch_info_[col_idx].local_col_id ])),
1022+ val,
1023+ &ad,
1024+ &is_end);
1025+ CHECK (!is_end);
1026+ return ad.is_null ? -1 : ad.length ;
1027+ }
1028+ }
1029+
1030+ if (val)
1031+ return read_int_from_buff (size_slot_ptr, size_slot_sz);
1032+ return -1 ;
1033+ };
1034+
1035+ offsets[0 ] = 0 ;
1036+ size_t row_idx = 0 ;
1037+ ResultSetRowIterator iter (this );
1038+ ++iter;
1039+ const auto data_elem_size = query_mem_desc_.getPaddedSlotWidthBytes (data_slot_idx);
1040+ const auto size_elem_size = query_mem_desc_.getPaddedSlotWidthBytes (size_slot_idx);
1041+ while (iter.global_entry_idx_valid_ ) {
1042+ const auto storage_lookup_result = findStorage (iter.global_entry_idx_ );
1043+ auto storage = storage_lookup_result.storage_ptr ;
1044+ auto local_entry_idx = storage_lookup_result.fixedup_entry_idx ;
1045+
1046+ const int8_t * elem_ptr = nullptr ;
1047+ const int8_t * size_ptr = nullptr ;
1048+ if (query_mem_desc_.didOutputColumnar ()) {
1049+ auto col_ptr =
1050+ storage->buff_ + storage->query_mem_desc_ .getColOffInBytes (data_slot_idx);
1051+ elem_ptr = col_ptr + data_elem_size * local_entry_idx;
1052+ auto size_col_ptr =
1053+ storage->buff_ + storage->query_mem_desc_ .getColOffInBytes (size_slot_idx);
1054+ size_ptr = size_col_ptr + size_elem_size * local_entry_idx;
1055+ } else {
1056+ auto keys_ptr = row_ptr_rowwise (storage->buff_ , query_mem_desc_, local_entry_idx);
1057+ const auto key_bytes_with_padding =
1058+ align_to_int64 (get_key_bytes_rowwise (query_mem_desc_));
1059+ elem_ptr = keys_ptr + key_bytes_with_padding + data_slot_offs;
1060+ size_ptr = keys_ptr + key_bytes_with_padding + size_slot_offs;
1061+ }
1062+
1063+ auto val = read_int_from_buff (elem_ptr, data_elem_size);
1064+ auto elem_length = slot_val_to_length (
1065+ storage_lookup_result.storage_idx , val, size_ptr, size_elem_size);
1066+ if (elem_length < 0 ) {
1067+ if (type->isString ()) {
1068+ offsets[row_idx + 1 ] = offsets[row_idx];
1069+ } else {
1070+ offsets[row_idx + 1 ] = -std::abs (offsets[row_idx]);
1071+ }
1072+ } else {
1073+ offsets[row_idx + 1 ] = std::abs (offsets[row_idx]) + elem_length * arr_elem_size;
1074+ }
1075+
1076+ ++iter;
1077+ ++row_idx;
1078+ }
1079+
1080+ return row_idx + 1 ;
1081+ }
1082+
9551083// Returns a bitmap (and total number) of all single slot targets
9561084std::tuple<std::vector<bool >, size_t > ResultSet::getSingleSlotTargetBitmap () const {
9571085 std::vector<bool > target_bitmap (targets_.size (), true );
@@ -976,7 +1104,8 @@ std::tuple<std::vector<bool>, size_t> ResultSet::getSingleSlotTargetBitmap() con
9761104 *
9771105 * The final goal is to remove the need for such selection, but at the moment for any
9781106 * target that doesn't qualify for direct columnarization, we use the traditional
979- * result set's iteration to handle it (e.g., count distinct, approximate count distinct)
1107+ * result set's iteration to handle it (e.g., count distinct, approximate count
1108+ * distinct)
9801109 */
9811110std::tuple<std::vector<bool >, size_t > ResultSet::getSupportedSingleSlotTargetBitmap ()
9821111 const {
0 commit comments