-
Notifications
You must be signed in to change notification settings - Fork 976
Remove unnecessary synchronization (miss-sync) during Parquet reading (Part 4: vector_factories) #20120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: branch-25.12
Are you sure you want to change the base?
Remove unnecessary synchronization (miss-sync) during Parquet reading (Part 4: vector_factories) #20120
Changes from all commits
4c860ec
28bb730
2febabb
c7ad2e8
64f98b5
2a1e294
4ccf8d9
b2c4e0c
81acfd5
c2feb39
ac5a34e
045e9aa
852e64e
4c8591b
09ed07e
4b1b0b5
1bb499f
9c537b1
7d462c5
b739be1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This links back to the draft PR for reference, but covering the full change in |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Change the type of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So no need to change the |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -418,8 +418,8 @@ void reader_impl::decode_page_data(read_mode mode, | |
// that it is difficult/impossible for a given page to know that it is writing the very | ||
// last value that should then be followed by a terminator (because rows can span | ||
// page boundaries). | ||
std::vector<size_type*> out_buffers; | ||
std::vector<size_type> final_offsets; | ||
auto out_buffers = cudf::detail::make_host_vector<size_type*>(0, _stream); | ||
auto final_offsets = cudf::detail::make_host_vector<size_type>(0, _stream); | ||
Comment on lines
+421
to
+422
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: The reason I raise this is that most existing uses of |
||
out_buffers.reserve(_input_columns.size()); | ||
final_offsets.reserve(_input_columns.size()); | ||
for (size_t idx = 0; idx < _input_columns.size(); idx++) { | ||
|
@@ -437,14 +437,14 @@ void reader_impl::decode_page_data(read_mode mode, | |
|
||
// the final offset for a list at level N is the size of it's child | ||
size_type const offset = child.type.id() == type_id::LIST ? child.size - 1 : child.size; | ||
out_buffers.emplace_back(static_cast<size_type*>(out_buf.data()) + (out_buf.size - 1)); | ||
final_offsets.emplace_back(offset); | ||
out_buffers.push_back(static_cast<size_type*>(out_buf.data()) + (out_buf.size - 1)); | ||
final_offsets.push_back(offset); | ||
out_buf.user_data |= PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED; | ||
} else if (out_buf.type.id() == type_id::STRING) { | ||
// only if it is not a large strings column | ||
if (std::cmp_less_equal(col_string_sizes[idx], strings::detail::get_offset64_threshold())) { | ||
out_buffers.emplace_back(static_cast<size_type*>(out_buf.data()) + out_buf.size); | ||
final_offsets.emplace_back(static_cast<size_type>(col_string_sizes[idx])); | ||
out_buffers.push_back(static_cast<size_type*>(out_buf.data()) + out_buf.size); | ||
final_offsets.push_back(static_cast<size_type>(col_string_sizes[idx])); | ||
} | ||
// Nested large strings column | ||
else if (input_col.nesting_depth() > 0) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This links back to the draft PR for reference:
https://github.com/rapidsai/cudf/pull/18968/files#diff-b281f280563cbbee7c16afb29ef989d808476a355c9c36a8f4e27fc5dc2ca4fd