Skip to content

Commit 2fd5141

Browse files
authored
Add memory resources to I/O modules (#20136)
Contributes to #15170 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: #20136
1 parent 06427ab commit 2fd5141

File tree

26 files changed

+146
-34
lines changed

26 files changed

+146
-34
lines changed

python/pylibcudf/pylibcudf/io/avro.pxd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
22
from rmm.pylibrmm.stream cimport Stream
3+
from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
34

45
from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
56

@@ -25,4 +26,6 @@ cdef class AvroReaderOptionsBuilder:
2526
cpdef AvroReaderOptionsBuilder num_rows(self, size_type num_rows)
2627
cpdef AvroReaderOptions build(self)
2728

28-
cpdef TableWithMetadata read_avro(AvroReaderOptions options, Stream stream = *)
29+
cpdef TableWithMetadata read_avro(
30+
AvroReaderOptions options, Stream stream = *, DeviceMemoryResource mr=*
31+
)

python/pylibcudf/pylibcudf/io/avro.pyi

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (c) 2024, NVIDIA CORPORATION.
2+
from rmm.pylibrmm.memory_resource import DeviceMemoryResource
23
from rmm.pylibrmm.stream import Stream
34

45
from pylibcudf.io.types import SourceInfo, TableWithMetadata
@@ -16,5 +17,7 @@ class AvroReaderOptionsBuilder:
1617
def build(self) -> AvroReaderOptions: ...
1718

1819
def read_avro(
19-
options: AvroReaderOptions, stream: Stream = None
20+
options: AvroReaderOptions,
21+
stream: Stream = None,
22+
mr: DeviceMemoryResource = None,
2023
) -> TableWithMetadata: ...

python/pylibcudf/pylibcudf/io/avro.pyx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ from libcpp.utility cimport move
55
from libcpp.vector cimport vector
66

77
from rmm.pylibrmm.stream cimport Stream
8+
from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
89

910
from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
1011

@@ -15,7 +16,7 @@ from pylibcudf.libcudf.io.avro cimport (
1516

1617
from pylibcudf.libcudf.types cimport size_type
1718

18-
from pylibcudf.utils cimport _get_stream
19+
from pylibcudf.utils cimport _get_stream, _get_memory_resource
1920

2021

2122
__all__ = ["read_avro", "AvroReaderOptions", "AvroReaderOptionsBuilder"]
@@ -136,6 +137,7 @@ cdef class AvroReaderOptionsBuilder:
136137
cpdef TableWithMetadata read_avro(
137138
AvroReaderOptions options,
138139
Stream stream = None,
140+
DeviceMemoryResource mr=None,
139141
):
140142
"""
141143
Read from Avro format.
@@ -151,9 +153,12 @@ cpdef TableWithMetadata read_avro(
151153
Settings for controlling reading behavior
152154
stream : Stream | None
153155
CUDA stream used for device memory operations and kernel launches
156+
mr : DeviceMemoryResource, optional
157+
Device memory resource used to allocate the returned table's device memory.
154158
"""
155159
cdef Stream s = _get_stream(stream)
160+
mr = _get_memory_resource(mr)
156161
with nogil:
157-
c_result = move(cpp_read_avro(options.c_obj, s.view()))
162+
c_result = move(cpp_read_avro(options.c_obj, s.view(), mr.get_mr()))
158163

159164
return TableWithMetadata.from_libcudf(c_result, s)

python/pylibcudf/pylibcudf/io/csv.pxd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ from libcpp.string cimport string
44
from libcpp.vector cimport vector
55

66
from rmm.pylibrmm.stream cimport Stream
7+
from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
78

89
from pylibcudf.io.types cimport SinkInfo, SourceInfo, TableWithMetadata
910

@@ -70,7 +71,9 @@ cdef class CsvReaderOptionsBuilder:
7071
cpdef CsvReaderOptionsBuilder delimiter(self, str delimiter)
7172
cpdef CsvReaderOptions build(self)
7273

73-
cpdef TableWithMetadata read_csv(CsvReaderOptions options, Stream stream = *)
74+
cpdef TableWithMetadata read_csv(
75+
CsvReaderOptions options, Stream stream = *, DeviceMemoryResource mr=*
76+
)
7477

7578
cdef class CsvWriterOptions:
7679
cdef csv_writer_options c_obj

python/pylibcudf/pylibcudf/io/csv.pyi

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from typing_extensions import Self
44

5+
from rmm.pylibrmm.memory_resource import DeviceMemoryResource
56
from rmm.pylibrmm.stream import Stream
67

78
from pylibcudf.io.types import (
@@ -59,8 +60,9 @@ class CsvReaderOptionsBuilder:
5960
def read_csv(
6061
options: CsvReaderOptions,
6162
stream: Stream = None,
63+
mr: DeviceMemoryResource = None,
6264
) -> TableWithMetadata: ...
63-
def write_csv(options: CsvWriterOptionsBuilder, stream: Stream = None): ...
65+
def write_csv(options: CsvWriterOptions, stream: Stream = None): ...
6466

6567
class CsvWriterOptions:
6668
def __init__(self): ...

python/pylibcudf/pylibcudf/io/csv.pyx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ from libcpp.utility cimport move
77
from libcpp.vector cimport vector
88

99
from rmm.pylibrmm.stream cimport Stream
10+
from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
1011

1112
from pylibcudf.io.types cimport SourceInfo, SinkInfo, TableWithMetadata
1213

@@ -30,7 +31,7 @@ from pylibcudf.table cimport Table
3031

3132
from pylibcudf.types cimport DataType
3233

33-
from pylibcudf.utils cimport _get_stream
34+
from pylibcudf.utils cimport _get_stream, _get_memory_resource
3435

3536
__all__ = [
3637
"read_csv",
@@ -656,6 +657,7 @@ cdef class CsvReaderOptionsBuilder:
656657
cpdef TableWithMetadata read_csv(
657658
CsvReaderOptions options,
658659
Stream stream = None,
660+
DeviceMemoryResource mr=None,
659661
):
660662
"""
661663
Read from CSV format.
@@ -671,11 +673,14 @@ cpdef TableWithMetadata read_csv(
671673
Settings for controlling reading behavior
672674
stream : Stream | None
673675
CUDA stream used for device memory operations and kernel launches
676+
mr : DeviceMemoryResource, optional
677+
Device memory resource used to allocate the returned table's device memory.
674678
"""
675679
cdef table_with_metadata c_result
676680
cdef Stream s = _get_stream(stream)
681+
mr = _get_memory_resource(mr)
677682
with nogil:
678-
c_result = move(cpp_read_csv(options.c_obj, s.view()))
683+
c_result = move(cpp_read_csv(options.c_obj, s.view(), mr.get_mr()))
679684

680685
cdef TableWithMetadata tbl_meta = TableWithMetadata.from_libcudf(c_result, s)
681686
return tbl_meta

python/pylibcudf/pylibcudf/io/json.pxd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ cdef class JsonReaderOptionsBuilder:
7979
cpdef JsonReaderOptionsBuilder unquoted_control_chars(self, bool val)
8080
cpdef build(self)
8181

82-
cpdef TableWithMetadata read_json(JsonReaderOptions options, Stream stream = *)
82+
cpdef TableWithMetadata read_json(
83+
JsonReaderOptions options, Stream stream = *, DeviceMemoryResource mr = *
84+
)
8385

8486
cpdef TableWithMetadata read_json_from_string_column(
8587
Column input,
@@ -120,4 +122,5 @@ cpdef tuple chunked_read_json(
120122
JsonReaderOptions options,
121123
int chunk_size= *,
122124
Stream stream = *,
125+
DeviceMemoryResource mr = *,
123126
)

python/pylibcudf/pylibcudf/io/json.pyi

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ from typing import TypeAlias
44

55
from typing_extensions import Self
66

7+
from rmm.pylibrmm.memory_resource import DeviceMemoryResource
78
from rmm.pylibrmm.stream import Stream
89

910
from pylibcudf.column import Column
@@ -71,7 +72,9 @@ class JsonReaderOptionsBuilder:
7172
def build(self) -> JsonReaderOptions: ...
7273

7374
def read_json(
74-
options: JsonReaderOptions, stream: Stream = None
75+
options: JsonReaderOptions,
76+
stream: Stream = None,
77+
mr: DeviceMemoryResource = None,
7578
) -> TableWithMetadata: ...
7679
def read_json_from_string_column(
7780
input: Column,
@@ -81,6 +84,7 @@ def read_json_from_string_column(
8184
compression: CompressionType,
8285
recovery_mode: JSONRecoveryMode,
8386
stream: Stream = None,
87+
mr: DeviceMemoryResource = None,
8488
) -> TableWithMetadata: ...
8589

8690
class JsonWriterOptions:
@@ -104,5 +108,6 @@ def chunked_read_json(
104108
options: JsonReaderOptions,
105109
chunk_size: int = 100_000_000,
106110
stream: Stream = None,
111+
mr: DeviceMemoryResource = None,
107112
) -> tuple[list[Column], list[str], ChildNameToTypeMap]: ...
108113
def is_supported_write_json(type: DataType) -> bool: ...

python/pylibcudf/pylibcudf/io/json.pyx

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,7 @@ cpdef tuple chunked_read_json(
689689
JsonReaderOptions options,
690690
int chunk_size=100_000_000,
691691
Stream stream = None,
692+
DeviceMemoryResource mr = None,
692693
):
693694
"""
694695
Reads chunks of a JSON file into a :py:class:`~.types.TableWithMetadata`.
@@ -718,14 +719,15 @@ cpdef tuple chunked_read_json(
718719
child_names = None
719720
i = 0
720721
cdef Stream s = _get_stream(stream)
722+
mr = _get_memory_resource(mr)
721723
while True:
722724
options.enable_lines(True)
723725
options.set_byte_range_offset(c_range_size * i)
724726
options.set_byte_range_size(c_range_size)
725727

726728
try:
727729
with nogil:
728-
c_result = move(cpp_read_json(options.c_obj, s.view()))
730+
c_result = move(cpp_read_json(options.c_obj, s.view(), mr.get_mr()))
729731
except (ValueError, OverflowError):
730732
break
731733
if meta_names is None:
@@ -754,7 +756,8 @@ cpdef tuple chunked_read_json(
754756

755757
cpdef TableWithMetadata read_json(
756758
JsonReaderOptions options,
757-
Stream stream = None
759+
Stream stream = None,
760+
DeviceMemoryResource mr = None
758761
):
759762
"""
760763
Read from JSON format.
@@ -778,8 +781,9 @@ cpdef TableWithMetadata read_json(
778781
"""
779782
cdef table_with_metadata c_result
780783
cdef Stream s = _get_stream(stream)
784+
mr = _get_memory_resource(mr)
781785
with nogil:
782-
c_result = move(cpp_read_json(options.c_obj, s.view()))
786+
c_result = move(cpp_read_json(options.c_obj, s.view(), mr.get_mr()))
783787

784788
return TableWithMetadata.from_libcudf(c_result, s)
785789

python/pylibcudf/pylibcudf/io/orc.pxd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ from libcpp.string cimport string
99
from libcpp.vector cimport vector
1010

1111
from rmm.pylibrmm.stream cimport Stream
12+
from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
1213

1314
from pylibcudf.io.types cimport (
1415
SourceInfo,
@@ -61,7 +62,9 @@ cdef class OrcReaderOptionsBuilder:
6162
cpdef OrcReaderOptionsBuilder use_index(self, bool use)
6263
cpdef OrcReaderOptions build(self)
6364

64-
cpdef TableWithMetadata read_orc(OrcReaderOptions options)
65+
cpdef TableWithMetadata read_orc(
66+
OrcReaderOptions options, Stream stream = *, DeviceMemoryResource mr=*
67+
)
6568

6669
cdef class OrcColumnStatistics:
6770
cdef optional[uint64_t] number_of_values_c

0 commit comments

Comments
 (0)